New set function, parse a column (#4097)

- New `set` function design - takes a `Column` and works with that more easily and supports control of `Set_Mode`.
- New simple `parse` API on `Column`.
- Separated expression support for `filter` to new `filter_by_expression` on `Table`.
- New `compute` function allowing creation of a column from an expression.
- Added case sensitivity argument to `Column` based on `starts_with`, `ends_with` and `contains`.
- Added case sensitivity argument to `Filter_Condition` for `Starts_With`, `Ends_With`, `Contains` and `Not_Contains`.
- Fixed the issue in JS Table visualisation where JavaScript date was incorrectly set.
- Some dynamic dropdown expressions - experimenting with ways to use them.
- Fixed issue with `.pretty` that wasn't escaping `\`.
- Changed default Postgres DB to `postgres`.
- Fixed SQLite support for starts_with, ends_with and contains to be consistent (using GLOB not LIKE).
This commit is contained in:
James Dunkerley 2023-01-31 20:48:16 +00:00 committed by GitHub
parent c965ad3455
commit 0790ce494f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
36 changed files with 897 additions and 452 deletions

View File

@ -220,8 +220,8 @@
API and added builders for customizing less common settings.][3516]
- [Allow control of sort direction in `First` and `Last` aggregations.][3517]
- [Implemented `Text.write`, replacing `File.write_text`.][3518]
- [Removed obsolete `select`, `group`, `sort` and releated types from tables.]
[3519]
- [Removed obsolete `select`, `group`, `sort` and releated types from
tables.][3519]
- [Removed obsolete `from_xls` and `from_xlsx` functions. Added support for
reading column names from first row in `File_Format.Excel`][3523]
- [Added `File_Format.Delimited` support to `Table.write` for new files.][3528]
@ -298,6 +298,8 @@
backend.][4063]
- [Updated `Text.starts_with`, `Text.ends_with` and `Text.contains` to new
simpler API.][4078]
- [Updated `Table.set` to new API. New `Column.parse` function and added case
sensitivity to `Filter_Condition` and column functions.][4097]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -459,6 +461,7 @@
[4052]: https://github.com/enso-org/enso/pull/4052
[4063]: https://github.com/enso-org/enso/pull/4063
[4078]: https://github.com/enso-org/enso/pull/4078
[4097]: https://github.com/enso-org/enso/pull/4097
#### Enso Compiler
@ -590,7 +593,7 @@
[3631]: https://github.com/enso-org/enso/pull/3631
[3633]: https://github.com/enso-org/enso/pull/3633
[3637]: https://github.com/enso-org/enso/pull/3637
[3637]: https://github.com/enso-org/enso/pull/3638
[3638]: https://github.com/enso-org/enso/pull/3638
[3641]: https://github.com/enso-org/enso/pull/3641
[3658]: https://github.com/enso-org/enso/pull/3658
[3671]: https://github.com/enso-org/enso/pull/3671

View File

@ -179,14 +179,14 @@ class TableVisualization extends Visualization {
if (content instanceof Object) {
const type = content.type
if (type === 'Date') {
to_render = new Date(content.year, content.month, content.day)
to_render = new Date(content.year, content.month - 1, content.day)
.toISOString()
.substring(0, 10)
} else if (type === 'Time_Of_Day') {
const js_date = new Date(
0,
0,
0,
1,
content.hour,
content.minute,
content.second,
@ -198,7 +198,7 @@ class TableVisualization extends Visualization {
} else if (type === 'Date_Time') {
const js_date = new Date(
content.year,
content.month,
content.month - 1,
content.day,
content.hour,
content.minute,

View File

@ -1,4 +1,5 @@
import project.Any.Any
import project.Data.Text.Case_Sensitivity.Case_Sensitivity
import project.Data.Text.Extensions
import project.Data.Text.Regex
import project.Data.Text.Text
@ -7,6 +8,9 @@ import project.Nothing.Nothing
from project.Data.Boolean import all
from project.Metadata.Widget import Single_Choice
import project.Metadata.Display
polyglot java import org.enso.base.Regex_Utils
from project.Data.Filter_Condition.Filter_Condition import all
@ -39,7 +43,7 @@ type Filter_Condition
It accepts a Text value to check if the value contains it. In case of
Table operations, it can accept another column - then the corresponding
values from the source column and the provided column are checked.
Starts_With (prefix:Text)
Starts_With (prefix:Text) (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default)
## Does the value end with a suffix (Text only)?
@ -47,7 +51,7 @@ type Filter_Condition
It accepts a Text value to check if the value contains it. In case of
Table operations, it can accept another column - then the corresponding
values from the source column and the provided column are checked.
Ends_With (suffix:Text)
Ends_With (suffix:Text) (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default)
## Does the value contain the substring (Text only)?
@ -55,7 +59,7 @@ type Filter_Condition
It accepts a Text value to check if the value contains it. In case of
Table operations, it can accept another column - then the corresponding
values from the source column and the provided column are checked.
Contains (substring:Text)
Contains (substring:Text) (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default)
## Is the substring not present in the value (Text only)?
@ -63,7 +67,7 @@ type Filter_Condition
It accepts a Text value to check if the value contains it. In case of
Table operations, it can accept another column - then the corresponding
values from the source column and the provided column are checked.
Not_Contains (substring:Text)
Not_Contains (substring:Text) (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default)
## Is equal to Nothing?
Is_Nothing
@ -165,10 +169,10 @@ type Filter_Condition
Not_Equal value -> !=value
Between lower upper -> elem ->
(lower <= elem) && (elem <= upper)
Starts_With prefix -> _.starts_with prefix
Ends_With suffix -> _.ends_with suffix
Contains substring -> _.contains substring
Not_Contains substring -> v-> v.contains substring . not
Starts_With prefix case_sensitivity -> _.starts_with prefix case_sensitivity
Ends_With suffix case_sensitivity -> _.ends_with suffix case_sensitivity
Contains substring case_sensitivity -> _.contains substring case_sensitivity
Not_Contains substring case_sensitivity -> v-> v.contains substring case_sensitivity . not
Is_Nothing -> elem -> case elem of
Nothing -> True
_ -> False
@ -196,6 +200,13 @@ type Filter_Condition
Is_In values -> values.contains
Not_In values -> elem -> values.contains elem . not
## PRIVATE
Gets a widget set up for a Filter_Condition.
widget_for_filter_condition =
## values = ["(Filter_Condition.Equal to=_)", "(Filter_Condition.Not_Equal to=_)", "(Filter_Condition.Is_In values=_)", "(Filter_Condition.Not_In values=_)", "Filter_Condition.Is_True", "Filter_Condition.Is_False", "Filter_Condition.Is_Nothing", "Filter_Condition.Not_Nothing", "Filter_Condition.Is_Empty", "Filter_Condition.Not_Empty", "(Filter_Condition.Less than=_)", "(Filter_Condition.Equal_Or_Less than=_)", "(Filter_Condition.Greater than=_)", "(Filter_Condition.Equal_Or_Greater than=_)", "(Filter_Condition.Between lower=_ upper=_)", "(Filter_Condition.Starts_With prefix=_)", "(Filter_Condition.Ends_With suffix=_)", "(Filter_Condition.Contains substring=_)", "(Filter_Condition.Not_Contains substring=_)", "(Filter_Condition.Like pattern=_)", "(Filter_Condition.Not_Like pattern=_)"]
values = ["(Filter_Condition.Equal _)", "(Filter_Condition.Not_Equal _)", "(Filter_Condition.Is_In _)", "(Filter_Condition.Not_In _)", "Filter_Condition.Is_True", "Filter_Condition.Is_False", "Filter_Condition.Is_Nothing", "Filter_Condition.Not_Nothing", "Filter_Condition.Is_Empty", "Filter_Condition.Not_Empty", "(Filter_Condition.Less _)", "(Filter_Condition.Equal_Or_Less _)", "(Filter_Condition.Greater _)", "(Filter_Condition.Equal_Or_Greater _)", "(Filter_Condition.Between _ _)", "(Filter_Condition.Starts_With _)", "(Filter_Condition.Ends_With _)", "(Filter_Condition.Contains _)", "(Filter_Condition.Not_Contains _)", "(Filter_Condition.Like _)", "(Filter_Condition.Not_Like _)"]
Single_Choice values display=Display.Always
## PRIVATE
sql_like_to_regex sql_pattern =
regex_pattern = Regex_Utils.sql_like_pattern_to_regex sql_pattern

View File

@ -742,7 +742,7 @@ Text.starts_with self prefix case_sensitivity=Case_Sensitivity.Sensitive = case
Case_Sensitivity.Default -> self.starts_with prefix Case_Sensitivity.Sensitive
Case_Sensitivity.Sensitive -> Text_Utils.starts_with self prefix
Case_Sensitivity.Insensitive locale ->
self.take (Index_Sub_Range.First prefix.length) . equals_ignore_case prefix locale=locale
self.take (Index_Sub_Range.First prefix.length) . equals_ignore_case prefix locale=locale
## ALIAS Check Suffix
@ -770,7 +770,7 @@ Text.ends_with self suffix case_sensitivity=Case_Sensitivity.Sensitive = case ca
Case_Sensitivity.Default -> self.ends_with suffix Case_Sensitivity.Sensitive
Case_Sensitivity.Sensitive -> Text_Utils.ends_with self suffix
Case_Sensitivity.Insensitive locale ->
self.take (Index_Sub_Range.Last suffix.length) . equals_ignore_case suffix locale=locale
self.take (Index_Sub_Range.Last suffix.length) . equals_ignore_case suffix locale=locale
## ALIAS Contains
@ -812,7 +812,7 @@ Text.contains self term="" case_sensitivity=Case_Sensitivity.Sensitive = case ca
Case_Sensitivity.Default -> self.contains term Case_Sensitivity.Sensitive
Case_Sensitivity.Sensitive -> Text_Utils.contains self term
Case_Sensitivity.Insensitive locale ->
Text_Utils.contains_case_insensitive self term locale.java_locale
Text_Utils.contains_case_insensitive self term locale.java_locale
## Takes an integer and returns a new text, consisting of `count` concatenated
copies of `self`.

View File

@ -17,8 +17,16 @@ type Display
## Parameter is only shown on the expanded view.
Expanded_Only
type Parameter_Type
Parameter value:Text label:Text="code" parameters:(Vector Widget)=[] icon:Text=""
type Choice
## Describes an entry in a Single_Choice or Multiple_Choice widget.
Fields:
- value: The code to insert for the entry.
- label: The text to display for the entry. By default, the `value` is used.
- parameters: A list of parameters for the arguments for the `value`.
This provides the structure needed for nested widgets.
- icon: The icon to display for the entry. By default, no icon is used.
Option value:Text label:Text=value parameters:(Vector (Pair Text Widget))=[] icon:Text=""
type File_Action
## The File or Folder is for reading from.
@ -27,30 +35,40 @@ type File_Action
## The File or Folder is for writing to.
Save
type Widget
## Describe a code parameter.
Code_Input label:(Nothing|Text)=Nothing display:Display=Display.When_Modified
Code_Input label:(Nothing | Text)=Nothing display:Display=Display.When_Modified
## Describe a boolean parameter.
Boolean_Input label:Nothing|Text=Nothing display:Display=Display.When_Modified
Boolean_Input label:(Nothing | Text)=Nothing display:Display=Display.When_Modified
## Describe a numeric parameter.
Numeric_Input label:Nothing|Text=Nothing display:Display=Display.When_Modified minimum:Integer|Nothing=Nothing maximum:Integer|Nothing=Nothing step:Number=1 decimal_places:Integer=0 allow_outside:Boolean=True
Numeric_Input label:(Nothing | Text)=Nothing display:Display=Display.When_Modified minimum:Integer|Nothing=Nothing maximum:Integer|Nothing=Nothing step:Number=1 decimal_places:Integer=0 allow_outside:Boolean=True
## Describes a text widget.
Text_Input label:Nothing|Text=Nothing display:Display=Display.When_Modified quote_values:Boolean=True suggestions:(Vector Text)=[]
Text_Input label:(Nothing | Text)=Nothing display:Display=Display.When_Modified quote_values:Boolean=True suggestions:(Vector Text)=[]
## Describes a single value widget.
Single_Choice values:(Vector Parameter) label:Nothing|Text=Nothing display:Display=Display.When_Modified quote_values:Boolean=False allow_custom:Boolean=True
## Describes a single value widget (drowdown).
Fields:
- values: A list of choices to display.
If a `Text` value is used, it is treated as `Option value:Text`.
- label: The text to display for the widget.
By default, the parameter name is used.
- display: The display mode for the parameter.
- quote_values: Should the values be quoted automatically?
- allow_custom: Allow the user to enter a value not in the list?
Single_Choice values:(Vector (Choice | Text)) label:(Nothing | Text)=Nothing display:Display=Display.When_Modified quote_values:Boolean=False allow_custom:Boolean=True
## Describes a multi value widget.
Multiple_Choice values:(Vector Parameter) label:Nothing|Text=Nothing display:Display=Display.When_Modified quote_values:Boolean=False
Multiple_Choice values:(Vector (Choice | Text)) label:(Nothing | Text)=Nothing display:Display=Display.When_Modified quote_values:Boolean=False
## Describes a list editor widget.
Vector_Editor item_editor:Widget values:((Vector Parameter)|Nothing)=Nothing label:Nothing|Text=Nothing display:Display=Display.When_Modified
Vector_Editor item_editor:Widget values:((Vector (Choice | Text)) | Nothing)=Nothing label:(Nothing | Text)=Nothing display:Display=Display.When_Modified
## Describes a folder chooser.
Folder_Browse label:Nothing|Text=Nothing display:Display=Display.When_Modified
Folder_Browse label:(Nothing | Text)=Nothing display:Display=Display.When_Modified
## Describes a file chooser.
File_Browse label:Nothing|Text=Nothing display:Display=Display.When_Modified action:File_Action=File_Action.Open file_types:(Vector Pair)=[Pair "All Files" "*.*"]
File_Browse label:(Nothing | Text)=Nothing display:Display=Display.When_Modified action:File_Action=File_Action.Open file_types:(Vector Pair)=[Pair.new "All Files" "*.*"]

View File

@ -17,9 +17,13 @@ type Postgres_Options
Arguments:
- host: The hostname of the database server (defaults to localhost).
- port: The port of the database server (defaults to 5432).
- database: The database to connect to. If empty, the default database will be used.
- schema: The schema to connect to. If empty, the default schema will be used.
- credentials: The credentials to use for the connection (defaults to PGPass or No Authentication).
- database: The database to connect to. By default, it will use the
database provided in `PGDATABASE` environment variable, or if that is
not defined, it will fall back to a default database name: `postgres`.
- schema: The schema to connect to. If empty, the default schema will be
used.
- credentials: The credentials to use for the connection (defaults to
PGPass or No Authentication).
- use_ssl: Whether to use SSL (defaults to `SSL_Mode.Prefer`).
- client_cert: The client certificate to use or `Nothing` if not needed.
Postgres (host:Text=default_postgres_host) (port:Integer=default_postgres_port) (database:Text=default_postgres_database) (schema:Text="") (credentials:(Credentials|Nothing)=Nothing) (use_ssl:SSL_Mode=SSL_Mode.Prefer) (client_cert:(Client_Certificate|Nothing)=Nothing)
@ -97,4 +101,4 @@ default_postgres_port =
port -> Integer.parse port . catch Number_Parse_Error.Error (_->hardcoded_port)
## PRIVATE
default_postgres_database = Environment.get "PGDATABASE" ""
default_postgres_database = Environment.get "PGDATABASE" "postgres"

View File

@ -5,6 +5,8 @@ import Standard.Base.Error.Illegal_State.Illegal_State
from Standard.Table import Sort_Column
import Standard.Table.Data.Value_Type.Value_Type
import Standard.Table.Data.Column.Column as Materialized_Column
import Standard.Table.Data.Column_Type_Selection.Auto
import Standard.Table.Data.Value_Type.Value_Type
import project.Data.SQL_Statement.SQL_Statement
import project.Data.SQL_Type.SQL_Type
@ -16,7 +18,7 @@ import project.Internal.IR.Query.Query
from project.Data.Table import Table, freshen_columns
from project.Errors import Unsupported_Database_Operation, Integrity_Error
from project.Errors import Unsupported_Database_Operation, Integrity_Error, Unsupported_Name
type Column
@ -612,10 +614,9 @@ type Column
Arguments:
- new_name: The name to rename `self` column to.
rename : Text -> Column
rename self new_name = case Helpers.ensure_name_is_sane new_name of
True ->
Column.Value new_name self.connection self.sql_type self.expression self.context
rename : Text -> Column ! Unsupported_Name
rename self new_name = Helpers.ensure_name_is_sane new_name <|
Column.Value new_name self.connection self.sql_type self.expression self.context
## UNSTABLE
@ -669,13 +670,16 @@ type Column
Arguments:
- other: A column or text to check for each item in `self`.
- case_sensitivity: Specifies if the text values should be compared case
sensitively.
If `other` is a column, the operation is performed pairwise between
corresponding elements of `self` and `other`. If the argument is a
missing value (a Nothing or a column with missing values), the behaviour
on these missing values is vendor specific.
starts_with : Column | Text -> Column
starts_with self other = self.make_binary_op "starts_with" other new_type=SQL_Type.boolean
starts_with : Column | Text -> Case_Sensitivity -> Column
starts_with self other case_sensitivity=Case_Sensitivity.Default =
make_text_case_op self "starts_with" other case_sensitivity
## UNSTABLE
@ -683,13 +687,16 @@ type Column
Arguments:
- other: A column ot text to check for each item in `self`.
- case_sensitivity: Specifies if the text values should be compared case
sensitively.
If `other` is a column, the operation is performed pairwise between
corresponding elements of `self` and `other`. If the argument is a
missing value (a Nothing or a column with missing values), the behaviour
on these missing values is vendor specific.
ends_with : Column | Text -> Column
ends_with self other = self.make_binary_op "ends_with" other new_type=SQL_Type.boolean
ends_with : Column | Text -> Case_Sensitivity -> Column
ends_with self other case_sensitivity=Case_Sensitivity.Default =
make_text_case_op self "ends_with" other case_sensitivity
## UNSTABLE
@ -697,13 +704,16 @@ type Column
Arguments:
- other: A column ot text to check for each item in `self`.
- case_sensitivity: Specifies if the text values should be compared case
sensitively.
If `other` is a column, the operation is performed pairwise between
corresponding elements of `self` and `other`. If the argument is a
missing value (a Nothing or a column with missing values), the behaviour
on these missing values is vendor specific.
contains : Column | Text -> Column
contains self other = self.make_binary_op "contains" other new_type=SQL_Type.boolean
contains : Column | Text -> Case_Sensitivity -> Column
contains self other case_sensitivity=Case_Sensitivity.Default =
make_text_case_op self "contains" other case_sensitivity
## Checks for each element of the column if it matches an SQL-like pattern.
@ -780,6 +790,44 @@ type Column
new_expr = SQL_Expression.Operation "IS_IN_COLUMN" [self.expression, in_subquery, has_nulls_subquery]
Column.Value self.name self.connection SQL_Type.boolean new_expr self.context
## Parses a text column into values
Arguments:
- type: Target type to parse the value to.
If `Auto`, will attempt to deduce the type automatically.
- format: For date/time types, the format to parse the value with.
For boolean types, two values that represent true and false separated
by `|`.
- on_problems: What to do if the value cannot be parsed.
By default, an `Invalid_Format` warning is attached and the value is
set to `Nothing`.
> Example
Parse dates in a column in the format `yyyy-MM-dd` (the default format).
import Standard.Examples
example_contains = Examples.text_column_1.parse Date
> Example
Parse dates in a column in the format `dd/MM/yyyy`
import Standard.Examples
example_contains = Examples.text_column_1.parse Date 'dd/MM/yyyy'
> Example
Parse a Yes/No column into a boolean column.
import Standard.Examples
example_contains = Examples.text_column_1.parse Boolean 'Yes|No'
@type (Single_Choice ['Auto','Integer','Decimal','Date','Date_Time','Time_Of_Day','Boolean'] display=Display.Always)
parse : (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Nothing -> Problem_Behavior -> Column
parse self type=Auto format=Nothing on_problems=Report_Warning =
_ = [type, format, on_problems]
Error.throw <| Unsupported_Database_Operation.Error "`Column.parse` is not implemented yet for the Database backends."
## PRIVATE
as_internal : Internal_Column
as_internal self = Internal_Column.Value self.name self.sql_type self.expression
@ -799,3 +847,15 @@ get_approximate_type value default = case value of
_ : Column -> value.sql_type
Nothing -> default
_ -> SQL_Type.approximate_type value
## PRIVATE
Helper for case case_sensitivity based text operations
make_text_case_op left op other case_sensitivity =
Value_Type.expect_text left.value_type <| case case_sensitivity of
Case_Sensitivity.Default -> left.make_binary_op op other new_type=SQL_Type.boolean
Case_Sensitivity.Sensitive ->
cs_other = if other.is_a Column then other.make_unary_op "MAKE_CASE_SENSITIVE" else other
left.make_unary_op "MAKE_CASE_SENSITIVE" . make_binary_op op cs_other new_type=SQL_Type.boolean
Case_Sensitivity.Insensitive locale -> Helpers.assume_default_locale locale <|
ci_other = if other.is_a Column then other.make_unary_op "FOLD_CASE" else other.to_case Case.Lower
left.make_unary_op "FOLD_CASE" . make_binary_op op ci_other new_type=SQL_Type.boolean

View File

@ -11,6 +11,7 @@ import Standard.Base.Error.Unimplemented.Unimplemented
from Standard.Base.Metadata.Widget import Single_Choice
import Standard.Base.Metadata.Display
from Standard.Table import Auto_Detect, Aggregate_Column, Data_Formatter, Column_Name_Mapping, Column_Selector, Sort_Column_Selector, Sort_Column, Match_Columns, Position, Set_Mode
from Standard.Table import Auto_Detect, Aggregate_Column, Data_Formatter, Column_Name_Mapping, Column_Selector, Sort_Column, Match_Columns, Position
import Standard.Table.Data.Column_Type_Selection.Column_Type_Selection
import Standard.Table.Data.Expression.Expression
@ -20,6 +21,8 @@ import Standard.Table.Data.Join_Kind.Join_Kind
import Standard.Table.Data.Report_Unmatched.Report_Unmatched
import Standard.Table.Data.Row.Row
import Standard.Table.Data.Table.Table as Materialized_Table
import Standard.Table.Data.Value_Type.Value_Type
import Standard.Table.Internal.Java_Exports
import Standard.Table.Internal.Table_Helpers
import Standard.Table.Internal.Table_Helpers.Table_Column_Helper
import Standard.Table.Internal.Problem_Builder.Problem_Builder
@ -27,7 +30,7 @@ import Standard.Table.Internal.Aggregate_Column_Helper
from Standard.Table.Data.Column import get_item_string
from Standard.Table.Data.Table import print_table
from Standard.Table.Internal.Filter_Condition_Helpers import make_filter_column
from Standard.Table.Errors import Column_Count_Mismatch, No_Index_Set_Error, No_Such_Column, No_Input_Columns_Selected, No_Output_Columns
from Standard.Table.Errors import Column_Count_Mismatch, No_Index_Set_Error, No_Such_Column, No_Input_Columns_Selected, No_Output_Columns, Existing_Column, Missing_Column
import project.Data.Column.Column
import project.Data.SQL_Statement.SQL_Statement
@ -42,7 +45,7 @@ import project.Internal.IR.Internal_Column.Internal_Column
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
import project.Internal.IR.Query.Query
from Standard.Database.Errors import Unsupported_Database_Operation, Integrity_Error
from project.Errors import Unsupported_Database_Operation, Integrity_Error, Unsupported_Name
import project.Connection.Connection.Connection
polyglot java import java.sql.JDBCType
@ -427,16 +430,27 @@ type Table
on_problems.handle_errors fallback=self.with_no_rows <|
mask (make_filter_column column filter)
_ : Function -> Error.throw (Unsupported_Database_Operation.Error "Filtering with a custom predicate is not supported in the database.")
_ : Text ->
_ ->
table_at = self.at column
if table_at.is_error.not then self.filter table_at filter on_problems else
expression = self.evaluate column
if expression.is_error.not then self.filter expression filter on_problems else
pick_error = expression.catch Expression_Error.Syntax_Error (_->table_at)
on_problems.handle_errors pick_error fallback=self
_ -> case on_problems.handle_errors (self.at column) fallback=Nothing of
Nothing -> self
resolved_column -> self.filter resolved_column filter on_problems
self.filter table_at filter on_problems
## ALIAS Filter Rows
Selects only the rows of this table that correspond to `True` values of
`filter`.
Arguments:
- expression: The expression to evaluate to filter the rows.
> Example
Select people celebrating a jubilee.
people.filter_by_expression "[age] % 10 == 0"
filter_by_expression : Text -> Problem_Behavior -> Table
filter_by_expression self expression on_problems=Report_Warning =
column = self.compute expression
if column.value_type != Value_Type.Boolean then Error.throw (Illegal_Argument.Error "Expression must evaluate to a boolean value.") else
self.filter column on_problems=on_problems
## PRIVATE
with_no_rows self =
@ -511,36 +525,53 @@ type Table
Sets the column value at the given name.
Arguments:
- name: The name of the column to set.
- column: The new value for the column called `name`.
- column: The new column or expression to create column.
- new_name: Optional new name for the column.
- set_mode: Specifies how to handle the new column.
If `Add_Or_Update`, the column will be added if not present, updated
if present. If `Add`, the column will be added if not present, error
if already present. If `Update`, the column will be updated if
present, error if missing.
- on_problems: Specifies how to handle problems with expression
evaluation.
set : Text -> Column | Text -> Problem_Behavior -> Table ! Unsupported_Name | Existing_Column | Missing_Column
set self column new_name=Nothing set_mode=Set_Mode.Add_Or_Update on_problems=Report_Warning =
resolved = case column of
_ : Text -> self.compute column on_problems
_ -> column
renamed = if new_name.is_nothing then resolved else resolved.rename new_name
If a column with the given name already exists, it will be replaced.
Otherwise a new column is added.
set : Text -> Column | Text -> Problem_Behavior -> Table
set self name column on_problems=Report_Warning = on_problems.handle_errors fallback=self <|
case Helpers.ensure_name_is_sane name of
True ->
resolved = case column of
_ : Text -> self.evaluate column
_ -> column
new_col = Internal_Column.Value name resolved.sql_type resolved.expression
replace = self.internal_columns.any (c -> c.name == name)
case replace of
True ->
new_cols = self.internal_columns.map (c -> if c.name == name then new_col else c)
self.updated_columns new_cols
False ->
self.updated_columns (self.internal_columns + [new_col])
Helpers.ensure_name_is_sane renamed.name <|
index = self.internal_columns.index_of (c -> c.name == renamed.name)
to_add = case set_mode of
Set_Mode.Add_Or_Update -> True
Set_Mode.Add -> if index.is_nothing then True else Error.throw (Existing_Column.Error renamed.name)
Set_Mode.Update -> if index.is_nothing then Error.throw (Missing_Column.Error renamed.name) else True
if to_add then
new_col = renamed.as_internal
new_cols = if index.is_nothing then self.internal_columns + [new_col] else
Vector.new self.column_count i-> if i == index then new_col else self.internal_columns.at i
self.updated_columns new_cols
## PRIVATE
evaluate : Text -> Column
evaluate self expression =
## Given an expression, create a derived column where each value is the
result of evaluating the expression for the row.
Arguments:
- expression: The expression to evaluate.
- on_problems: Specifies how to handle problems with expression
evaluation.
compute : Text -> Problem_Behavior -> Column
compute self expression on_problems=Report_Warning =
get_column name = self.at name
make_constant value =
new_type = SQL_Type.approximate_type value
other = SQL_Expression.Constant new_type value
Column.Value ("Constant_" + UUID.randomUUID.to_text) self.connection new_type other self.context
Expression.evaluate expression get_column make_constant "Standard.Database.Data.Column" "Column" Column.var_args_functions
new_column = Expression.evaluate expression get_column make_constant "Standard.Database.Data.Column" "Column" Column.var_args_functions
if new_column.is_error then new_column else
warnings = Warning.get_all new_column
rename = new_column.rename (expression.replace "[^A-Za-z_0-9]" "_" matcher=Regex_Matcher.Value)
on_problems.attach_problems_after (Warning.set rename []) warnings
## UNSTABLE
@ -725,6 +756,7 @@ type Table
allows to join the two tables on equality of corresponding columns with
the same name. So `table.join other on=["A", "B"]` is a shorthand for:
table.join other on=[Join_Condition.Equals "A" "A", Join_Condition.Equals "B" "B"]
@join_kind (Single_Choice ["Join_Kind.Inner", "Join_Kind.Left_Outer", "Join_Kind.Right_Outer", "Join_Kind.Full", "Join_Kind.Left_Exclusive", "Join_Kind.Right_Exclusive"]])
join : Table -> Join_Kind -> Join_Condition | Text | Vector (Join_Condition | Text) -> Text -> Problem_Behavior -> Table
join self right join_kind=Join_Kind.Inner on=[Join_Condition.Equals 0 0] right_prefix="Right_" on_problems=Report_Warning =
can_proceed = if Table_Helpers.is_table right . not then Error.throw (Type_Error.Error Table right "right") else

View File

@ -55,17 +55,17 @@ unify_vector_singleton x = case x of
Arguments:
- name: The name to check for safety.
- action: The action to perform if the name is safe.
In a future version we will decouple the internal SQL-safe names from the
external names shown to the user, but as a temporary solution we only allow
SQL-safe names for columns.
# TODO [RW] better name handling in Tables (#1513)
ensure_name_is_sane : Text -> Boolean ! Unsupported_Name
ensure_name_is_sane name =
is_safe =
Pattern.matches "[A-Za-z_0-9]+" name
if is_safe then True else
ensure_name_is_sane : Text -> (Any -> Any) -> Any ! Unsupported_Name
ensure_name_is_sane name ~action =
is_safe = Pattern.matches "[A-Za-z_0-9]+" name
if is_safe then action else
Error.throw <| Unsupported_Name.Error (name + " is not a valid name for a column. Please use english letters, numbers and underscore only.")
## PRIVATE

View File

@ -1,6 +1,9 @@
from Standard.Base import all
import Standard.Base.Error.Illegal_State.Illegal_State
from Standard.Base.Metadata.Widget import Single_Choice
import Standard.Base.Metadata.Display
import Standard.Table.Data.Table.Table as Materialized_Table
import project.Connection.Connection.Connection
@ -48,6 +51,7 @@ type Postgres_Connection
Arguments:
- database: The name of the database to connect to.
@database (self-> Single_Choice display=Display.Always values=(self.databases . map .pretty))
set_database : Text -> Connection ! SQL_Error
set_database self database =
if database == self.database then self else
@ -65,6 +69,7 @@ type Postgres_Connection
Arguments:
- schema: The name of the schema to connect to.
@schema (self-> Single_Choice display=Display.Always values=(self.schemas . map .pretty))
set_schema : Text -> Connection ! SQL_Error
set_schema self schema =
if schema == self.schema then self else
@ -82,6 +87,7 @@ type Postgres_Connection
- schema: The schema name to search in (defaults to current schema).
- types: The table types to search for. The list of values can be obtained using the `table_types` method.
- all_fields: Return all the fields in the metadata table.
@types (self-> Single_Choice values=(self.table_types.map .pretty))
tables : Text -> Text -> Text -> Vector -> Boolean -> Materialized_Table
tables self name_like=Nothing database=self.database schema=self.schema types=Nothing all_fields=False =
self.connection.tables name_like database schema types all_fields
@ -92,6 +98,7 @@ type Postgres_Connection
- query: name of the table or sql statement to query.
If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
- alias: optionally specify a friendly alias for the query.
@query (self-> Single_Choice display=Display.Always values=(self.tables.at "Name" . to_vector . map .pretty))
query : Text | SQL_Query -> Text -> Database_Table
query self query alias="" = self.connection.query query alias
@ -101,6 +108,7 @@ type Postgres_Connection
- query: name of the table or sql statement to query.
If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
- limit: the maximum number of rows to return.
@query (self-> Single_Choice display=Display.Always values=(self.tables.at "Name" . to_vector . map .pretty))
read : Text | SQL_Query -> Integer | Nothing -> Materialized_Table
read self query limit=Nothing = self.connection.read query limit

View File

@ -94,7 +94,7 @@ type Postgres_Dialect
## PRIVATE
make_internal_generator_dialect =
cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]
text = [starts_with, contains, ends_with, agg_shortest, agg_longest]+concat_ops+cases
text = [starts_with, contains, ends_with, agg_shortest, agg_longest, make_case_sensitive]+concat_ops+cases
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
arith_extensions = [is_nan, decimal_div]
bool = [bool_or]
@ -242,7 +242,7 @@ agg_count_distinct_include_null args =
## PRIVATE
starts_with = Base_Generator.lift_binary_op "starts_with" str-> sub->
res = str ++ " LIKE CONCAT(" ++ sub ++ ", '%')"
res = code "starts_with(" ++ str ++ "," ++ sub ++ ")"
res.paren
## PRIVATE
@ -252,7 +252,7 @@ ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub->
## PRIVATE
make_case_sensitive = Base_Generator.lift_unary_op "MAKE_CASE_SENSITIVE" arg->
code "((" ++ arg ++ ') COLLATE "C.utf8")'
code "((" ++ arg ++ ') COLLATE "ucs_basic")'
## PRIVATE
make_contains_expr expr substring =

View File

@ -44,6 +44,7 @@ type SQLite_Connection
Arguments:
- database: The name of the database to connect to.
@database (Single_Choice display=Display.Always values=['Nothing'])
set_database : Text -> Connection ! SQL_Error
set_database self database =
if database == self.database then self else
@ -61,6 +62,7 @@ type SQLite_Connection
Arguments:
- schema: The name of the schema to connect to.
@schema (Single_Choice display=Display.Always values=['Nothing'])
set_schema : Text -> Connection ! SQL_Error
set_schema self schema =
if schema == self.schema then self else

View File

@ -112,7 +112,7 @@ type SQLite_Dialect
## PRIVATE
make_internal_generator_dialect =
text = [starts_with, contains, ends_with, fold_case, make_case_sensitive]+concat_ops
text = [starts_with, contains, ends_with, make_case_sensitive]+concat_ops
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
stats = [agg_stddev_pop, agg_stddev_samp]
arith_extensions = [decimal_div]
@ -227,18 +227,14 @@ agg_count_distinct_include_null args = case args.length == 1 of
## PRIVATE
starts_with = Base_Generator.lift_binary_op "starts_with" str-> sub->
res = str ++ " LIKE (" ++ sub ++ " || '%')"
res = str ++ " GLOB (" ++ sub ++ " || '*')"
res.paren
## PRIVATE
ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub->
res = str ++ " LIKE ('%' || " ++ sub ++ ")"
res = str ++ " GLOB ('*' || " ++ sub ++ ")"
res.paren
## PRIVATE
fold_case = Base_Generator.lift_unary_op "FOLD_CASE" arg->
code "((" ++ arg ++ ") COLLATE NOCASE)"
## PRIVATE
make_case_sensitive = Base_Generator.lift_unary_op "MAKE_CASE_SENSITIVE" arg->
code "((" ++ arg ++ ") COLLATE BINARY)"

View File

@ -7,11 +7,17 @@ import Standard.Base.Error.Illegal_State.Illegal_State
import Standard.Base.Data.Index_Sub_Range as Index_Sub_Range_Module
import project.Data.Table.Table
import project.Data.Storage.Storage
import project.Data.Value_Type.Value_Type
from project.Data.Table import print_table
from Standard.Base.Metadata.Widget import Single_Choice
import Standard.Base.Metadata.Display
import project.Data.Column_Type_Selection.Auto
import project.Data.Data_Formatter.Data_Formatter
import project.Data.Storage.Storage
import project.Data.Table.Table
import project.Data.Value_Type.Value_Type
import project.Internal.Parse_Values_Helper
from project.Data.Table import print_table
from project.Errors import No_Index_Set_Error
polyglot java import org.enso.table.data.column.storage.Storage as Java_Storage
@ -715,6 +721,8 @@ type Column
- other: The value to compare `self` with. If `other` is a column, the
operation is performed pairwise between corresponding elements of
`self` and `other`.
- case_sensitivity: Specifies if the text values should be compared case
sensitively.
> Example
Check the elements of a column for starting with the elements of
@ -731,9 +739,16 @@ type Column
import Standard.Examples
example_starts_with = Examples.text_column_1.starts_with "hell"
starts_with : Column | Text -> Column
starts_with self other =
run_vectorized_binary_op self "starts_with" (a -> b -> a.starts_with b) other
> Example
Check the elements of a column for starting with a value comparing case insensitively.
import Standard.Examples
example_starts_with = Examples.text_column_1.starts_with "hell" Case_Sensitivity.Insensitive
starts_with : Column | Text -> Case_Sensitivity -> Column
starts_with self other case_sensitivity=Case_Sensitivity.Default =
run_vectorized_binary_case_text_op self Java_Storage.Maps.STARTS_WITH other case_sensitivity (a -> b -> a.starts_with b case_sensitivity)
## Checks for each element of the column if it ends with `other`.
@ -741,6 +756,8 @@ type Column
- other: The value to compare `self` with. If `other` is a column, the
operation is performed pairwise between corresponding elements of
`self` and `other`.
- case_sensitivity: Specifies if the text values should be compared case
sensitively.
> Example
Check the elements of a column for ending with the elements of another
@ -757,9 +774,9 @@ type Column
import Standard.Examples
example_ends_with = Examples.text_column_1.ends_with "hell"
ends_with : Column | Text -> Column
ends_with self other =
run_vectorized_binary_op self "ends_with" (a -> b -> a.ends_with b) other
ends_with : Column | Text -> Case_Sensitivity -> Column
ends_with self other case_sensitivity=Case_Sensitivity.Default =
run_vectorized_binary_case_text_op self Java_Storage.Maps.ENDS_WITH other case_sensitivity (a -> b -> a.ends_with b case_sensitivity)
## Checks for each element of the column if it contains `other`.
@ -767,6 +784,8 @@ type Column
- other: The value to compare `self` with. If `other` is a column, the
operation is performed pairwise between corresponding elements of
`self` and `other`.
- case_sensitivity: Specifies if the text values should be compared case
sensitively.
> Example
Check the elements of a column for containing the elements of another
@ -784,8 +803,8 @@ type Column
example_contains = Examples.text_column_1.contains "hell"
contains : Column | Text -> Column
contains self other =
run_vectorized_binary_op self "contains" (a -> b -> a.contains b) other
contains self other case_sensitivity=Case_Sensitivity.Default =
run_vectorized_binary_case_text_op self Java_Storage.Maps.CONTAINS other case_sensitivity (a -> b -> a.contains b case_sensitivity)
## Checks for each element of the column if it matches an SQL-like pattern.
@ -849,6 +868,66 @@ type Column
new_vector = self.to_vector.map (Filter_Condition.Is_In true_vector).to_predicate
Column.from_vector result_name new_vector
## Parses a text column into values
Arguments:
- type: Target type to parse the value to.
If `Auto`, will attempt to deduce the type automatically.
- format: For date/time types, the format to parse the value with.
For boolean types, two values that represent true and false separated
by `|`.
- on_problems: What to do if the value cannot be parsed.
By default, an `Invalid_Format` warning is attached and the value is
set to `Nothing`.
> Example
Parse dates in a column in the format `yyyy-MM-dd` (the default format).
import Standard.Examples
example_contains = Examples.text_column_1.parse Date
> Example
Parse dates in a column in the format `dd/MM/yyyy`
import Standard.Examples
example_contains = Examples.text_column_1.parse Date 'dd/MM/yyyy'
> Example
Parse a Yes/No column into a boolean column.
import Standard.Examples
example_contains = Examples.text_column_1.parse Boolean 'Yes|No'
@type (Single_Choice ['Auto','Integer','Decimal','Date','Date_Time','Time_Of_Day','Boolean'] display=Display.Always)
parse : (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Nothing -> Problem_Behavior -> Column
parse self type=Auto format=Nothing on_problems=Report_Warning =
ensure_type ~fn = if [Auto, Integer, Decimal, Date, Date_Time, Time_Of_Day, Boolean].index_of type == Nothing then Error.throw (Illegal_Argument.Error "Unsupported target type "+type.to_text+".") else fn
Value_Type.expect_text self.value_type <| ensure_type <|
formatter = if format.is_nothing then Data_Formatter.Value else case type of
Auto -> Error.throw (Illegal_Argument.Error "Cannot specify a `format` with type `Auto`.")
Integer -> Error.throw (Illegal_Argument.Error "Cannot specify a `format` with type `Integer`.")
Decimal -> Error.throw (Illegal_Argument.Error "Cannot specify a `format` with type `Decimal`.")
Date -> Data_Formatter.Value.with_datetime_formats date_formats=[format]
Date_Time -> Data_Formatter.Value.with_datetime_formats datetime_formats=[format]
Time_Of_Day -> Data_Formatter.Value.with_datetime_formats time_formats=[format]
Boolean ->
formats = format.split "|"
if formats.length != 2 then Error.throw (Illegal_Argument.Error "The `format` for Booleans must be a string with two values separated by `|`, for example: 'Yes|No'.") else
Data_Formatter.Value.with_boolean_values true_values=[formats.at 0] false_values=[formats.at 1]
parser = if type == Auto then formatter.make_auto_parser else formatter.make_datatype_parser type
storage = self.java_column.getStorage
new_storage_and_problems = parser.parseColumn self.name storage
new_storage = new_storage_and_problems.value
problems = Vector.from_polyglot_array new_storage_and_problems.problems . map (Parse_Values_Helper.translate_parsing_problem type)
output = Column.Value (Java_Column.new self.name new_storage)
on_problems.attach_problems_after output problems
## ALIAS Transform Column
Applies `function` to each item in this column and returns the column
@ -1361,3 +1440,18 @@ make_storage_builder_for_type value_type initial_size=128 =
Value_Type.Mixed -> Storage.Any
_ -> Storage.Any
Storage.make_builder closest_storage_type initial_size
## PRIVATE
Helper for case case_sensitivity based text operations
run_vectorized_binary_case_text_op : Column -> Text -> (Text | Column) -> Case_Sensitivity -> (Text -> Text -> Any) -> Column
run_vectorized_binary_case_text_op left op other case_sensitivity fallback =
Value_Type.expect_text left.value_type <| case case_sensitivity of
Case_Sensitivity.Default -> run_vectorized_binary_op left op fallback other
Case_Sensitivity.Sensitive -> run_vectorized_binary_op left op fallback other
Case_Sensitivity.Insensitive _ ->
## TODO currently this always runs the fallback which is slow due to the
cost of Java-to-Enso calls. We want to have a vectorized
implementation, but we need to extend the architecture to allow
passing the locale to it.
See: https://www.pivotaltracker.com/n/projects/2539304/stories/184093260
run_vectorized_binary_op left Nothing fallback other

View File

@ -0,0 +1,11 @@
type Set_Mode
## Add a new column to the table.
If already present an `Existing_Column` error is returned.
Add
## Update the column in the table.
If not present a `Missing_Column` error is returned.
Update
## Add the column to the table if not present, or update if present.
Add_Or_Update

View File

@ -13,6 +13,7 @@ import Standard.Base.Error.Unimplemented.Unimplemented
from Standard.Base.Metadata.Widget import Single_Choice
import Standard.Base.Metadata.Display
import project.Data.Aggregate_Column.Aggregate_Column
import project.Data.Column.Column
import project.Data.Column as Column_Module
import project.Data.Column_Name_Mapping.Column_Name_Mapping
@ -25,11 +26,10 @@ import project.Data.Match_Columns as Match_Columns_Helpers
import project.Data.Position.Position
import project.Data.Report_Unmatched.Report_Unmatched
import project.Data.Row.Row
import project.Data.Set_Mode.Set_Mode
import project.Data.Sort_Column.Sort_Column
import project.Data.Storage.Storage
import project.Data.Value_Type.Value_Type
import project.Data.Sort_Column.Sort_Column
import project.Data.Aggregate_Column.Aggregate_Column
import project.Data.Storage.Storage
import project.Internal.Aggregate_Column_Helper
import project.Internal.Java_Problems
import project.Internal.Join_Helpers
@ -44,7 +44,7 @@ import project.Delimited.Delimited_Format.Delimited_Format
from project.Data.Column_Type_Selection import Column_Type_Selection, Auto
from project.Internal.Rows_View import Rows_View
from project.Errors import Column_Count_Mismatch, Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector, No_Index_Set_Error, No_Such_Column, No_Input_Columns_Selected, No_Output_Columns, Invalid_Value_Type, Cross_Join_Row_Limit_Exceeded, Row_Count_Mismatch
from project.Errors import Column_Count_Mismatch, Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector, No_Index_Set_Error, No_Such_Column, No_Input_Columns_Selected, No_Output_Columns, Invalid_Value_Type, Cross_Join_Row_Limit_Exceeded, Row_Count_Mismatch, Existing_Column, Missing_Column
from project.Data.Column import get_item_string
from project.Internal.Filter_Condition_Helpers import make_filter_column
@ -797,8 +797,8 @@ type Table
`filter`.
Arguments:
- column: The column to use for filtering. Can be a column name, index or
the `Column` object itself.
- column: The column to use for filtering.
Can be a column name, index or the `Column` object itself.
- filter: The filter to apply to the column. It can either be an instance
of `Filter_Condition` or a predicate taking a cell value and returning
a boolean value indicating whether the corresponding row should be kept
@ -835,6 +835,8 @@ type Table
people.filter "age" (age -> (age%10 == 0))
@column (self-> Single_Choice display=Display.Always values=(self.column_names.map .pretty))
@filter Filter_Condition.widget_for_filter_condition
@on_problems (Single_Choice ["Report_Error", "Report_Warning", "Ignore"] display=Display.Expanded_Only)
filter : (Column | Text | Integer) -> (Filter_Condition|(Any->Boolean)) -> Problem_Behavior -> Table
filter self column filter=(Filter_Condition.Is_True) on_problems=Report_Warning = case column of
_ : Column ->
@ -844,16 +846,27 @@ type Table
on_problems.handle_errors fallback=self.with_no_rows <|
mask (make_filter_column column filter)
_ : Function -> mask (column.map filter)
_ : Text ->
_ ->
table_at = self.at column
if table_at.is_error.not then self.filter table_at filter on_problems else
expression = self.evaluate column
if expression.is_error.not then self.filter expression filter on_problems else
pick_error = expression.catch Expression_Error.Syntax_Error (_->table_at)
on_problems.handle_errors pick_error fallback=self
_ -> case on_problems.handle_errors (self.at column) fallback=Nothing of
Nothing -> self
resolved_column -> self.filter resolved_column filter on_problems
self.filter table_at filter on_problems
## ALIAS Filter Rows
Selects only the rows of this table that correspond to `True` values of
`filter`.
Arguments:
- expression: The expression to evaluate to filter the rows.
> Example
Select people celebrating a jubilee.
people.filter_by_expression "[age] % 10 == 0"
filter_by_expression : Text -> Problem_Behavior -> Table
filter_by_expression self expression on_problems=Report_Warning =
column = self.compute expression on_problems=on_problems
if column.value_type != Value_Type.Boolean then Error.throw (Illegal_Argument.Error "Expression must evaluate to a boolean value.") else
self.filter column on_problems=on_problems
## PRIVATE
with_no_rows self = self.take (First 0)
@ -904,17 +917,18 @@ type Table
## ALIAS Add Column, Update Column
Sets the column value at the given name.
Adds or updates the column in the table.
Arguments:
- name: The name of the column to set the value of.
- column: The new value for the column either a `Column` or `Text` of an
expression.
- on_problems: Specifies how to handle if a problem occurs, raising as a
warning by default.
If a column with the given name already exists, it will be replaced.
Otherwise a new column is added.
- column: The new column or expression to create column.
- new_name: Optional new name for the column.
- set_mode: Specifies how to handle the new column.
If `Add_Or_Update`, the column will be added if not present, updated
if present. If `Add`, the column will be added if not present, error
if already present. If `Update`, the column will be updated if
present, error if missing.
- on_problems: Specifies how to handle problems with expression
evaluation.
> Example
Create a table where the values of the total stock in the inventory is
@ -925,24 +939,39 @@ type Table
example_set =
table = Examples.inventory_table
double_inventory = table.at "total_stock" * 2
table.set "total_stock" double_inventory
table.set "total_stock_expr" "2 * [total_stock]"
set : Text -> Column | Vector | Text -> Problem_Behavior -> Table
set self name column on_problems=Report_Warning = case column of
_ : Text ->
expression = self.evaluate column
if expression.is_error.not then self.set name expression on_problems else
on_problems.handle_errors expression fallback=self
_ : Vector -> self.set name (Column.from_vector name column)
_ : Column ->
Table.Value (self.java_table.addOrReplaceColumn (column.rename name . java_column))
table.set double_inventory new_name="total_stock"
table.set "2 * [total_stock]" new_name="total_stock_expr"
set : Text -> Column | Text -> Problem_Behavior -> Table ! Existing_Column | Missing_Column
set self column new_name=Nothing set_mode=Set_Mode.Add_Or_Update on_problems=Report_Warning =
resolved = case column of
_ : Text -> self.compute column on_problems
_ : Column -> column
renamed = if new_name.is_nothing then resolved else resolved.rename new_name
to_add = case set_mode of
Set_Mode.Add_Or_Update -> True
Set_Mode.Add -> if self.java_table.getColumnByName renamed.name . is_nothing then True else
Error.throw (Existing_Column.Error renamed.name)
Set_Mode.Update -> if self.java_table.getColumnByName renamed.name . is_nothing . not then True else
Error.throw (Missing_Column.Error renamed.name)
## PRIVATE
evaluate : Text -> Column
evaluate self expression =
if to_add then Table.Value (self.java_table.addOrReplaceColumn renamed.java_column) else to_add
## Given an expression, create a derived column where each value is the
result of evaluating the expression for the row.
Arguments:
- expression: The expression to evaluate.
- on_problems: Specifies how to handle problems with expression
evaluation.
compute : Text -> Problem_Behavior -> Column
compute self expression on_problems=Report_Warning =
get_column name = self.at name
make_constant value = Column.from_vector_repeated (UUID.randomUUID.to_text) [value] self.row_count
Expression.evaluate expression get_column make_constant "Standard.Table.Data.Column" "Column" Column.var_args_functions
new_column = Expression.evaluate expression get_column make_constant "Standard.Table.Data.Column" "Column" Column.var_args_functions
if new_column.is_error then new_column else
warnings = Warning.get_all new_column
rename = new_column.rename expression
on_problems.attach_problems_after (Warning.set rename []) warnings
## Returns the vector of columns contained in this table.
@ -1037,6 +1066,7 @@ type Table
allows to join the two tables on equality of corresponding columns with
the same name. So `table.join other on=["A", "B"]` is a shorthand for:
table.join other on=[Join_Condition.Equals "A" "A", Join_Condition.Equals "B" "B"]
@join_kind (Single_Choice ["Join_Kind.Inner", "Join_Kind.Left_Outer", "Join_Kind.Right_Outer", "Join_Kind.Full", "Join_Kind.Left_Exclusive", "Join_Kind.Right_Exclusive"]])
join : Table -> Join_Kind -> Join_Condition | Text | Vector (Join_Condition | Text) -> Text -> Problem_Behavior -> Table
join self right join_kind=Join_Kind.Inner on=[Join_Condition.Equals 0 0] right_prefix="Right_" on_problems=Report_Warning =
if check_table "right" right then

View File

@ -65,7 +65,7 @@ type Delimited_Format
for_web content_type _ =
parts = content_type.split ";" . map .trim
charset_part = parts.find if_missing=Nothing (x->x.starts_with "charset=")
charset_part = parts.find if_missing=Nothing (x-> x.starts_with "charset=")
encoding = if charset_part.if_nothing then Encoding.utf_8 else
parsed = Encoding.from_name (charset_part.drop 8)
if parsed.is_error then Encoding.utf_8 else parsed

View File

@ -226,6 +226,20 @@ type Duplicate_Type_Selector
to_display_text : Text
to_display_text self = "Duplicate type selector for column " + self.column + "."
## Indicates that the column was already present in the table.
type Existing_Column
Error column_name
to_display_text : Text
to_display_text self = "The column '" + self.column_name + "' already exists, but `Set_Mode.Add` was selected."
## Indicates that the column was not present in the table.
type Missing_Column
Error column_name
to_display_text : Text
to_display_text self = "The column '" + self.column_name + "' was not found, but `Set_Mode.Update` was selected."
## Indicates that the target range contains existing data and the user did not
specify to overwrite.
type Existing_Data

View File

@ -32,22 +32,22 @@ make_filter_column source_column filter_condition = case filter_condition of
Greater value -> (source_column > value)
Between lower upper -> source_column.between lower upper
# Text
Starts_With prefix ->
Starts_With prefix case_sensitivity ->
Value_Type.expect_text source_column.value_type <|
expect_column_or_value_as_text "prefix" prefix <|
source_column.starts_with prefix
Ends_With suffix ->
source_column.starts_with prefix case_sensitivity
Ends_With suffix case_sensitivity ->
Value_Type.expect_text source_column.value_type <|
expect_column_or_value_as_text "suffix" suffix <|
source_column.ends_with suffix
Contains substring ->
source_column.ends_with suffix case_sensitivity
Contains substring case_sensitivity ->
Value_Type.expect_text source_column.value_type <|
expect_column_or_value_as_text "substring" substring <|
source_column.contains substring
Not_Contains substring ->
source_column.contains substring case_sensitivity
Not_Contains substring case_sensitivity ->
Value_Type.expect_text source_column.value_type <|
expect_column_or_value_as_text "substring" substring <|
source_column.contains substring . not
source_column.contains substring case_sensitivity . not
Is_Empty ->
Value_Type.expect_text source_column.value_type <|
source_column.is_empty

View File

@ -176,7 +176,7 @@ type Table_Column_Helper
adding a new column with a clashing name will not affect any
other columns computed from the old column with that name.
table_with_blank_indicators = blanks.fold self.table table-> blanks_col->
table.set blanks_col.name blanks_col
table.set blanks_col
just_indicators = table_with_blank_indicators.select_columns (blanks.map .name) on_problems=Problem_Behavior.Report_Error
# Maximum is equivalent to Exists and Minimum is equivalent to Forall.
col_aggregate = if when_any then Maximum _ else Minimum _
@ -212,7 +212,7 @@ type Table_Column_Helper
matched_columns = self.internal_columns.filter column->(column.name==selector)
if matched_columns.length == 1 then matched_columns.first else
if matched_columns.length != 0 then Panic.throw (Illegal_State.Error "A single exact match should never match more than one column. Perhaps the table breaks the invariant of unique column names?") else
expression = (self.table.evaluate selector).catch Any _->Nothing
expression = (self.table.compute selector).catch Any _->Nothing
if Nothing != expression then expression else
problem_builder.report_missing_input_columns [selector]
Nothing

View File

@ -4,12 +4,15 @@ import project.Data.Aggregate_Column.Aggregate_Column
import project.Data.Column.Column
import project.Data.Column_Name_Mapping.Column_Name_Mapping
import project.Data.Column_Selector.Column_Selector
import project.Data.Column_Type_Selection.Auto
import project.Data.Column_Type_Selection.Column_Type_Selection
import project.Data.Data_Formatter.Data_Formatter
import project.Data.Join_Condition.Join_Condition
import project.Data.Join_Kind.Join_Kind
import project.Data.Match_Columns.Match_Columns
import project.Data.Position.Position
import project.Data.Report_Unmatched.Report_Unmatched
import project.Data.Set_Mode.Set_Mode
import project.Data.Sort_Column.Sort_Column
import project.Data.Table.Table
import project.Data.Table_Conversions
@ -23,12 +26,15 @@ export project.Data.Aggregate_Column.Aggregate_Column
export project.Data.Column.Column
export project.Data.Column_Name_Mapping.Column_Name_Mapping
export project.Data.Column_Selector.Column_Selector
export project.Data.Column_Type_Selection.Auto
export project.Data.Column_Type_Selection.Column_Type_Selection
export project.Data.Data_Formatter.Data_Formatter
export project.Data.Join_Condition.Join_Condition
export project.Data.Join_Kind.Join_Kind
export project.Data.Match_Columns.Match_Columns
export project.Data.Position.Position
export project.Data.Report_Unmatched.Report_Unmatched
export project.Data.Set_Mode.Set_Mode
export project.Data.Sort_Column.Sort_Column
export project.Data.Table.Table
export project.Data.Table_Conversions

View File

@ -3,9 +3,7 @@ package org.enso.interpreter.runtime.data.text;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.Normalizer2;
import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.api.dsl.Cached;
import com.oracle.truffle.api.utilities.TriState;
import com.oracle.truffle.api.interop.InteropLibrary;
import com.oracle.truffle.api.interop.TruffleObject;
import com.oracle.truffle.api.library.CachedLibrary;
@ -197,7 +195,8 @@ public final class Text implements TruffleObject {
String str = toJavaStringNode.execute(this);
// TODO This should be more extensible
String replaced =
str.replace("'", "\\'")
str.replace("\\", "\\\\")
.replace("'", "\\'")
.replace("\n", "\\n")
.replace("\t", "\\t")
.replace("\u0007", "\\a")

View File

@ -120,18 +120,24 @@ spec setup =
((t.at "A").between (t.at "B") (t.at "C")) . to_vector . should_equal r
Test.group prefix+"Column Operations - Text" <|
t3 = table_builder [["s1", ["foobar", "bar", "baz", Nothing]], ["s2", ["foo", "ar", "a", Nothing]]]
t3 = table_builder [["s1", ["foobar", "bar", "baz", "BAB", Nothing]], ["s2", ["foo", "ar", "a", "b", Nothing]]]
s1 = t3.at "s1"
s2 = t3.at "s2"
Test.specify "should handle basic Text operations" <|
s1.starts_with s2 . to_vector . should_equal [True, False, False, Nothing]
s1.starts_with "foo" . to_vector . should_equal [True, False, False, Nothing]
s1.starts_with "ba" . to_vector . should_equal [False, True, True, Nothing]
s1.starts_with s2 . to_vector . should_equal [True, False, False, False, Nothing]
s1.starts_with s2 Case_Sensitivity.Insensitive . to_vector . should_equal [True, False, False, True, Nothing]
s1.starts_with "foo" . to_vector . should_equal [True, False, False, False, Nothing]
s1.starts_with "ba" . to_vector . should_equal [False, True, True, False, Nothing]
s1.starts_with "ba" Case_Sensitivity.Insensitive . to_vector . should_equal [False, True, True, True, Nothing]
s1.contains s2 . to_vector . should_equal [True, True, True, Nothing]
s1.contains "a" . to_vector . should_equal [True, True, True, Nothing]
s1.contains "oo" . to_vector . should_equal [True, False, False, Nothing]
s1.contains s2 . to_vector . should_equal [True, True, True, False, Nothing]
s1.contains s2 Case_Sensitivity.Insensitive . to_vector . should_equal [True, True, True, True, Nothing]
s1.contains "a" . to_vector . should_equal [True, True, True, False, Nothing]
s1.contains "oo" . to_vector . should_equal [True, False, False, False, Nothing]
s1.contains "a" Case_Sensitivity.Insensitive . to_vector . should_equal [True, True, True, True, Nothing]
s1.ends_with s2 . to_vector . should_equal [False, True, False, Nothing]
s1.ends_with "ar" . to_vector . should_equal [True, True, False, Nothing]
s1.ends_with "a" . to_vector . should_equal [False, False, False, Nothing]
s1.ends_with s2 . to_vector . should_equal [False, True, False, False, Nothing]
s1.ends_with s2 Case_Sensitivity.Insensitive . to_vector . should_equal [False, True, False, True, Nothing]
s1.ends_with "ar" . to_vector . should_equal [True, True, False, False, Nothing]
s1.ends_with "a" . to_vector . should_equal [False, False, False, False, Nothing]
s1.ends_with "b" Case_Sensitivity.Insensitive . to_vector . should_equal [False, False, False, True, Nothing]

View File

@ -2,6 +2,7 @@ from Standard.Base import all
import Standard.Base.Error.Common.Index_Out_Of_Bounds
import Standard.Base.Error.Illegal_Argument.Illegal_Argument
from Standard.Table import Set_Mode
from Standard.Table.Errors import all
from Standard.Test import Test
@ -14,7 +15,7 @@ main = run_default_backend spec
spec setup =
prefix = setup.prefix
table_builder = setup.table_builder
table =
table_fn =
col1 = ["foo", [1,2,3]]
col2 = ["bar", [4,5,6]]
col3 = ["Baz", [7,8,9]]
@ -23,6 +24,7 @@ spec setup =
col6 = ["ab.+123", [16,17,18]]
col7 = ["abcd123", [19,20,21]]
table_builder [col1, col2, col3, col4, col5, col6, col7]
table = table_fn
Test.group prefix+"Table.at" <|
Test.specify "should allow selecting columns by name" <|
@ -80,6 +82,42 @@ spec setup =
table.get (Pair.new 1 2) . should_fail_with Illegal_Argument.Error
table.get (Pair.new 1 2) . catch . to_display_text . should_equal "Illegal Argument: expected 'selector' to be either a Text or an Integer, but got Pair.Value."
Test.group prefix+"Table.set" <|
Test.specify "should allow adding a column" <|
bar2 = table.get "bar" . rename "bar2"
t2 = table.set bar2
t2.column_names . should_equal ["foo", "bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123", "bar2"]
t2.get "bar2" . to_vector . should_equal [4, 5, 6]
t3 = t2.set bar2 "bar3"
t3.column_names . should_equal ["foo", "bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123", "bar2", "bar3"]
Test.specify "should allow replacing a column" <|
foo = table.get "bar" . rename "foo"
t2 = table.set foo
t2.column_names . should_equal ["foo", "bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123"]
t2.get "foo" . to_vector . should_equal [4, 5, 6]
t3 = t2.set foo "bar3"
t3.column_names . should_equal ["foo", "bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123", "bar3"]
Test.specify "should allow adding a column" <|
bar2 = table.get "bar" . rename "bar2"
table.set bar2 set_mode=Set_Mode.Update . should_fail_with Missing_Column.Error
foo = table.get "bar" . rename "foo"
table.set foo set_mode=Set_Mode.Add . should_fail_with Existing_Column.Error
Test.specify "should not affect existing columns that depended on the old column being replaced" <|
t1 = table_builder [["X", [1,2,3]]]
t2 = t1.set (t1.at "X" * 100) new_name="Y"
t3 = t2.set "[X] + 10" new_name="Z"
t4 = t3.set "[X] + 1000" new_name="X"
t4.at "X" . to_vector . should_equal [1001, 1002, 1003]
t4.at "Y" . to_vector . should_equal [100, 200, 300]
t4.at "Z" . to_vector . should_equal [11, 12, 13]
Test.group prefix+"Table.column_names" <|
Test.specify "should return the names of all columns" <|
table.column_names . should_equal ["foo", "bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123"]

View File

@ -30,7 +30,7 @@ spec detailed setup =
epsilon=0.0000000001
tester expression value =
new_table = test_table.set "NEW_COL" expression
new_table = test_table.set expression new_name="NEW_COL"
new_table.column_count . should_equal (test_table.column_count + 1)
new_column = new_table.at -1
new_column.name . should_equal "NEW_COL"
@ -278,8 +278,7 @@ spec detailed setup =
Test.group prefix+"Errors should be handled" pending=pending_bug <|
error_tester expression fail_type =
test_table.set "NEW_COL" expression on_problems=Problem_Behavior.Report_Error . should_fail_with fail_type
test_table.set "NEW_COL" expression . column_count . should_equal test_table.column_count
test_table.set expression new_name="NEW_COL" . should_fail_with fail_type
specify_test "should fail with Syntax_Error if badly formed" expression_test=error_tester expression_test->
expression_test "IIF [A] THEN 1 ELSE 2" Expression_Error.Syntax_Error
@ -293,4 +292,4 @@ spec detailed setup =
expression_test "starts_with([C])" Expression_Error.Argument_Mismatch
specify_test "should fail with Argument_Mismatch if too many arguments" expression_test=error_tester expression_test->
expression_test "starts_with([C], 'Hello', 'World')" Expression_Error.Argument_Mismatch
expression_test "Not([C], 'Hello')" Expression_Error.Argument_Mismatch

View File

@ -1,8 +1,10 @@
from Standard.Base import all
import Standard.Base.Error.Common.Index_Out_Of_Bounds
import Standard.Base.Error.Common.Type_Error
import Standard.Base.Error.Illegal_Argument.Illegal_Argument
import Standard.Table.Data.Value_Type.Value_Type
import Standard.Table.Data.Expression.Expression_Error
from Standard.Table.Errors import all
from Standard.Database.Errors import SQL_Error
@ -78,26 +80,45 @@ spec setup =
t.filter "X" (Filter_Condition.Equal to=(t.at "Y")) . at "X" . to_vector . should_equal ["b", "c"]
t.filter "X" (Filter_Condition.Between (t.at "Y") "bzzzz") . at "X" . to_vector . should_equal ["abb", "baca", "b"]
Test.specify "by text search (contains, starts_with, ends_with, like)" <|
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["X", ["abb", "baca", "banana", Nothing, "nana"]], ["Y", ["a", "b", "d", "c", "a"]], ["Z", ["aaaaa", "bbbbb", "[ab]", "[ab]aaaa", "[ab]ccc"]]]
Test.specify "by text search (contains, starts_with, ends_with, not_contains)" <|
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["X", ["abb", "bacb", "banana", Nothing, "nana"]], ["Y", ["a", "B", "d", "c", "a"]], ["Z", ["aaaaa", "bbbbb", "[ab]", "[ab]aaaa", "[ab]ccc"]]]
t.filter "X" (Filter_Condition.Starts_With "ba") on_problems=Report_Error . at "X" . to_vector . should_equal ["baca", "banana"]
t.filter "X" (Filter_Condition.Starts_With "ba") on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb", "banana"]
t.filter "X" (Filter_Condition.Starts_With "BA" Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal []
t.filter "X" (Filter_Condition.Starts_With "BA" Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb", "banana"]
t.filter "X" (Filter_Condition.Ends_With "na") on_problems=Report_Error . at "X" . to_vector . should_equal ["banana", "nana"]
t.filter "X" (Filter_Condition.Contains "ac") on_problems=Report_Error . at "X" . to_vector . should_equal ["baca"]
t.filter "X" (Filter_Condition.Ends_With "NA" Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal []
t.filter "X" (Filter_Condition.Ends_With "NA" Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["banana", "nana"]
t.filter "X" (Filter_Condition.Contains "ac") on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb"]
t.filter "X" (Filter_Condition.Contains "AC" Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal []
t.filter "X" (Filter_Condition.Contains "AC" Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb"]
t.filter "X" (Filter_Condition.Not_Contains "ac") on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "banana", "nana"]
t.filter "X" (Filter_Condition.Not_Contains "AC" Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "bacb", "banana", "nana"]
t.filter "X" (Filter_Condition.Not_Contains "AC" Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "banana", "nana"]
t.filter "X" (Filter_Condition.Starts_With (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "baca"]
t.filter "X" (Filter_Condition.Starts_With (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb"]
t.filter "X" (Filter_Condition.Starts_With (t.at "Y") Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb"]
t.filter "X" (Filter_Condition.Starts_With (t.at "Y") Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "bacb"]
t.filter "X" (Filter_Condition.Ends_With (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["nana"]
t.filter "X" (Filter_Condition.Contains (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "baca", "nana"]
t.filter "X" (Filter_Condition.Not_Contains (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["banana"]
t.filter "X" (Filter_Condition.Ends_With (t.at "Y") Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["nana"]
t.filter "X" (Filter_Condition.Ends_With (t.at "Y") Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb", "nana"]
t.filter "X" (Filter_Condition.Contains (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "nana"]
t.filter "X" (Filter_Condition.Contains (t.at "Y") Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "nana"]
t.filter "X" (Filter_Condition.Contains (t.at "Y") Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "bacb", "nana"]
t.filter "X" (Filter_Condition.Not_Contains (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb", "banana"]
t.filter "X" (Filter_Condition.Not_Contains (t.at "Y") Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb", "banana"]
t.filter "X" (Filter_Condition.Not_Contains (t.at "Y") Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["banana"]
Test.specify "by text search (like, not_like)" <|
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["X", ["abb", "bacb", "banana", Nothing, "nana"]], ["Y", ["a", "B", "d", "c", "a"]], ["Z", ["aaaaa", "bbbbb", "[ab]", "[ab]aaaa", "[ab]ccc"]]]
t.filter "X" (Filter_Condition.Like "%an%") on_problems=Report_Error . at "X" . to_vector . should_equal ["banana", "nana"]
t.filter "X" (Filter_Condition.Like "_a%") on_problems=Report_Error . at "X" . to_vector . should_equal ["baca", "banana", "nana"]
t.filter "X" (Filter_Condition.Like "%b") on_problems=Report_Error . at "X" . to_vector . should_equal ["abb"]
t.filter "X" (Filter_Condition.Like "_a%") on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb", "banana", "nana"]
t.filter "X" (Filter_Condition.Like "%b") on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "bacb"]
t.filter "X" (Filter_Condition.Like "nana") on_problems=Report_Error . at "X" . to_vector . should_equal ["nana"]
t.filter "Z" (Filter_Condition.Like "[ab]_%") on_problems=Report_Error . at "Z" . to_vector . should_equal ["[ab]aaaa", "[ab]ccc"]
t.filter "X" (Filter_Condition.Not_Like "%b") on_problems=Report_Error . at "X" . to_vector . should_equal ["baca", "banana", "nana"]
t.filter "X" (Filter_Condition.Not_Like "%b") on_problems=Report_Error . at "X" . to_vector . should_equal ["banana", "nana"]
t.filter "Z" (Filter_Condition.Not_Like "[ab]%") on_problems=Report_Error . at "Z" . to_vector . should_equal ["aaaaa", "bbbbb"]
Test.specify "text operations should also match newlines" <|
@ -259,16 +280,29 @@ spec setup =
Test.specify "should handle selection errors: unknown column name" <|
t = table_builder [["X", [10, 20, 13, 4, 5]]]
action = t.filter "unknown column" on_problems=_
tester table =
table.at "X" . to_vector . should_equal (t.at "X" . to_vector)
problems = [No_Such_Column.Error "unknown column"]
Problems.test_problem_handling action problems tester
t.filter "unknown column" . should_fail_with No_Such_Column.Error
t.filter "unknown column" . catch . should_equal (No_Such_Column.Error "unknown column")
Test.specify "should handle selection errors: out of bounds index" <|
t = table_builder [["X", [10, 20, 13, 4, 5]]]
action = t.filter 4 on_problems=_
tester table =
table.at "X" . to_vector . should_equal (t.at "X" . to_vector)
problems = [Index_Out_Of_Bounds.Error 4 1]
Problems.test_problem_handling action problems tester
t.filter 4 . should_fail_with Index_Out_Of_Bounds.Error
t.filter 4 . catch . should_equal (Index_Out_Of_Bounds.Error 4 1)
Test.group prefix+"Table.filter_by_expression" <|
Test.specify "by a boolean column" <|
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]]
t.filter_by_expression "[b]" on_problems=Report_Error . at "ix" . to_vector . should_equal [1, 4, 5]
t.filter_by_expression "![b]" on_problems=Report_Error . at "ix" . to_vector . should_equal [2]
Test.specify "by an integer comparison" <|
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]]
t.filter_by_expression "[ix]==3" on_problems=Report_Error . at "ix" . to_vector . should_equal [3]
t.filter_by_expression "[ix]>2" on_problems=Report_Error . at "ix" . to_vector . should_equal [3, 4, 5]
Test.specify "fail gracefully" <|
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]]
t.filter_by_expression "[ix" . should_fail_with Expression_Error.Syntax_Error
t.filter_by_expression "NOT(b,False)" . should_fail_with Expression_Error.Argument_Mismatch
t.filter_by_expression "Starts_With(b)" . should_fail_with Expression_Error.Argument_Mismatch
t.filter_by_expression "[missing]" . should_fail_with No_Such_Column.Error
t.filter_by_expression "[ix]" . should_fail_with Illegal_Argument.Error

View File

@ -587,8 +587,8 @@ spec setup =
Test.specify "should work correctly when the join is performed on a transformed table" <|
t1 = table_builder [["X", [1, 2, 3]]]
t1_2 = t1.set "A" "10*[X]+1"
t1_3 = t1.set "B" "[X]+20"
t1_2 = t1.set "10*[X]+1" new_name="A"
t1_3 = t1.set "[X]+20" new_name="B"
t2 = t1_2.join t1_3 on=(Join_Condition.Equals "A" "B")
t2.at "A" . to_vector . should_equal [21]
@ -599,7 +599,7 @@ spec setup =
t4 = table_builder [["X", [1, 2, 3]], ["Y", [10, 20, 30]]]
t5 = table_builder [["X", [5, 7, 1]], ["Z", [100, 200, 300]]]
t4_2 = t4.set "C" "2*[X]+1"
t4_2 = t4.set "2*[X]+1" new_name="C"
t6 = t4_2.join t5 on=(Join_Condition.Equals "C" "X") join_kind=Join_Kind.Inner
expect_column_names ["X", "Y", "C", "Right_X", "Z"] t6
r2 = materialize t6 . order_by ["Y"] . rows . map .to_vector
@ -623,7 +623,7 @@ spec setup =
t4 = table_builder [["X", [1, 2, 3]], ["Y", [10, 20, 30]]]
t5 = table_builder [["X", [5, 7, 1]], ["Z", [100, 200, 300]]]
t4_2 = t4.set "C" "2*[X]+1"
t4_2 = t4.set "2*[X]+1" new_name="C"
t6 = t4_2.join t5 on=(Join_Condition.Equals "C" "X") join_kind=Join_Kind.Full
expect_column_names ["X", "Y", "C", "Right_X", "Z"] t6
r2 = materialize t6 . order_by ["Y"] . rows . map .to_vector
@ -634,7 +634,7 @@ spec setup =
r2.at 3 . should_equal [3, 30, 7, 7, 200]
t4_3 = table_builder [["X", [Nothing, 2, 3]], ["Y", [10, 20, 30]]]
t4_4 = t4_3.set "C" (t4_3.at "X" . fill_missing 7)
t4_4 = t4_3.set (t4_3.at "X" . fill_missing 7) new_name="C"
t7 = t4_4.join t5 on=(Join_Condition.Equals "C" "X") join_kind=Join_Kind.Full
within_table t7 <|
expect_column_names ["X", "Y", "C", "Right_X", "Z"] t7

View File

@ -51,7 +51,7 @@ spec =
foo = t1.at "A" . rename "FOO"
foo.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "FOO" FROM "T1" AS "T1"', []]
t3 = t2.set "bar" foo
t3 = t2.set foo new_name="bar"
t3.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C", "T1"."B" AS "B", "T1"."A" AS "bar" FROM "T1" AS "T1"', []]
Test.specify "should fail if at is called for a non-existent column" <|
@ -81,8 +81,8 @@ spec =
ends = b.ends_with "suf"
starts = b.starts_with "pref"
contains = b.contains "inf"
ends.to_sql.prepare . should_equal ['SELECT ("T1"."B" LIKE (\'%\' || ?)) AS "B" FROM "T1" AS "T1"', [["suf", str]]]
starts.to_sql.prepare . should_equal ['SELECT ("T1"."B" LIKE (? || \'%\')) AS "B" FROM "T1" AS "T1"', [["pref", str]]]
ends.to_sql.prepare . should_equal ['SELECT ("T1"."B" GLOB (\'*\' || ?)) AS "B" FROM "T1" AS "T1"', [["suf", str]]]
starts.to_sql.prepare . should_equal ['SELECT ("T1"."B" GLOB (? || \'*\')) AS "B" FROM "T1" AS "T1"', [["pref", str]]]
contains.to_sql.prepare . should_equal ['SELECT instr("T1"."B", ?) > 0 AS "B" FROM "T1" AS "T1"', [["inf", str]]]
Test.group "[Codegen] Masking Tables and Columns" <|

View File

@ -298,8 +298,8 @@ connection_setup_spec = Test.group "[PostgreSQL] Connection setup" <|
c2.host . should_equal "localhost"
c2.port . should_equal 5432
c2.database . should_equal ""
c2.jdbc_url . should_equal "jdbc:postgresql://localhost:5432"
c2.database . should_equal "postgres"
c2.jdbc_url . should_equal "jdbc:postgresql://localhost:5432/postgres"
c3.host . should_equal "192.168.0.1"
c3.port . should_equal 1000
@ -312,18 +312,18 @@ connection_setup_spec = Test.group "[PostgreSQL] Connection setup" <|
Postgres
c4.host . should_equal "localhost"
c4.port . should_equal 5432
c4.database . should_equal ""
c4.jdbc_url . should_equal "jdbc:postgresql://localhost:5432"
c4.database . should_equal "postgres"
c4.jdbc_url . should_equal "jdbc:postgresql://localhost:5432/postgres"
add_ssl props = props+[Pair.new 'sslmode' 'prefer']
Test.specify "should use the given credentials" <|
c = Postgres credentials=(Credentials.Username_And_Password "myuser" "mypass")
c.jdbc_url . should_equal "jdbc:postgresql://localhost:5432"
c.jdbc_url . should_equal "jdbc:postgresql://localhost:5432/postgres"
c.jdbc_properties . should_equal <| add_ssl [Pair.new "user" "myuser", Pair.new "password" "mypass"]
Test.specify "should fallback to environment variables and fill-out missing information based on the PGPASS file (if available)" <|
c1 = Postgres
c1.jdbc_url . should_equal "jdbc:postgresql://localhost:5432"
c1.jdbc_url . should_equal "jdbc:postgresql://localhost:5432/postgres"
c1.jdbc_properties . should_equal <| add_ssl []
Test_Environment.unsafe_with_environment_override "PGPASSWORD" "somepassword" <|

View File

@ -1,281 +1,337 @@
from Standard.Base import all
from Standard.Table import Table, Data_Formatter
from Standard.Table import Table, Data_Formatter, Column
from Standard.Table.Data.Column_Type_Selection import Column_Type_Selection, Auto
from Standard.Table.Errors import Invalid_Format, Leading_Zeros, Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector
from Standard.Test import Test, Test_Suite, Problems
import Standard.Test.Extensions
spec = Test.group "Table.parse_values" <|
Test.specify "should correctly parse integers" <|
t1 = Table.new [["ints", ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing]]]
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Integer]
t2.at "ints" . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing]
spec =
Test.group "Table.parse_values" <|
Test.specify "should correctly parse integers" <|
t1 = Table.new [["ints", ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing]]]
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Integer]
t2.at "ints" . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing]
Test.specify "should correctly parse decimals" <|
t1 = Table.new [["ints", ["0", "+0", "-0", "+1", "-1", "1", "12345", Nothing]]]
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
t2.at "ints" . to_vector . should_equal [0, 0, 0, 1, -1, 1, 12345, Nothing]
t2.at "ints" . to_vector . map .to_text . should_equal ["0.0", "0.0", "-0.0", "1.0", "-1.0", "1.0", "12345.0", "Nothing"]
Test.specify "should correctly parse decimals" <|
t1 = Table.new [["ints", ["0", "+0", "-0", "+1", "-1", "1", "12345", Nothing]]]
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
t2.at "ints" . to_vector . should_equal [0, 0, 0, 1, -1, 1, 12345, Nothing]
t2.at "ints" . to_vector . map .to_text . should_equal ["0.0", "0.0", "-0.0", "1.0", "-1.0", "1.0", "12345.0", "Nothing"]
t3 = Table.new [["floats", ["0.0", "+0.0", "-0.0", "+1.0", "-1.0", "1.0", "0.0000", "10.", "12345."]]]
t4 = t3.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
t4.at "floats" . to_vector . should_equal [0, 0, 0, 1, -1, 1, 0, 10, 12345]
t3 = Table.new [["floats", ["0.0", "+0.0", "-0.0", "+1.0", "-1.0", "1.0", "0.0000", "10.", "12345."]]]
t4 = t3.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
t4.at "floats" . to_vector . should_equal [0, 0, 0, 1, -1, 1, 0, 10, 12345]
t5 = Table.new [["floats", [".0", "0.", "1.", ".1", ".123", "-.1", "+.1", "+0.0", "0.1234", Nothing, "11111111.111"]]]
t6 = t5.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
t6.at "floats" . to_vector . should_equal [0.0, 0.0, 1.0, 0.1, 0.123, -0.1, 0.1, 0.0, 0.1234, Nothing, 11111111.111]
t5 = Table.new [["floats", [".0", "0.", "1.", ".1", ".123", "-.1", "+.1", "+0.0", "0.1234", Nothing, "11111111.111"]]]
t6 = t5.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
t6.at "floats" . to_vector . should_equal [0.0, 0.0, 1.0, 0.1, 0.123, -0.1, 0.1, 0.0, 0.1234, Nothing, 11111111.111]
Test.specify "should warn on leading zeros in numbers, if asked" <|
t1 = Table.new [["ints", ["0", "+00", "-00", "+01", "-01", "01", "000", "0010", "12345", Nothing]]]
t2 = Table.new [["floats", ["0.0000", ".0", "00.", "01.0", "-0010.0000", "1.0000"]]]
Test.specify "should warn on leading zeros in numbers, if asked" <|
t1 = Table.new [["ints", ["0", "+00", "-00", "+01", "-01", "01", "000", "0010", "12345", Nothing]]]
t2 = Table.new [["floats", ["0.0000", ".0", "00.", "01.0", "-0010.0000", "1.0000"]]]
t1_parsed = [0, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, 12345, Nothing]
t1_zeros = ["+00", "-00", "+01", "-01", "01", "000", "0010"]
t3 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Integer]
t3.at "ints" . to_vector . should_equal t1_parsed
Problems.get_attached_warnings t3 . should_equal [Leading_Zeros.Error "ints" Integer t1_zeros]
t1_parsed = [0, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, 12345, Nothing]
t1_zeros = ["+00", "-00", "+01", "-01", "01", "000", "0010"]
t3 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Integer]
t3.at "ints" . to_vector . should_equal t1_parsed
Problems.get_attached_warnings t3 . should_equal [Leading_Zeros.Error "ints" Integer t1_zeros]
t4 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
t4.at "ints" . to_vector . should_equal t1_parsed
Problems.get_attached_warnings t4 . should_equal [Leading_Zeros.Error "ints" Decimal t1_zeros]
t4 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
t4.at "ints" . to_vector . should_equal t1_parsed
Problems.get_attached_warnings t4 . should_equal [Leading_Zeros.Error "ints" Decimal t1_zeros]
t5 = t2.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
t5.at "floats" . to_vector . should_equal [0.0, 0.0, Nothing, Nothing, Nothing, 1.0]
Problems.get_attached_warnings t5 . should_equal [Leading_Zeros.Error "floats" Decimal ["00.", "01.0", '-0010.0000']]
t5 = t2.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
t5.at "floats" . to_vector . should_equal [0.0, 0.0, Nothing, Nothing, Nothing, 1.0]
Problems.get_attached_warnings t5 . should_equal [Leading_Zeros.Error "floats" Decimal ["00.", "01.0", '-0010.0000']]
opts = Data_Formatter.Value allow_leading_zeros=True
t1_parsed_zeros = [0, 0, 0, 1, -1, 1, 0, 10, 12345, Nothing]
t6 = t1.parse_values value_formatter=opts column_types=[Column_Type_Selection.Value 0 Integer]
t6.at "ints" . to_vector . should_equal t1_parsed_zeros
Warning.get_all t6 . should_equal []
opts = Data_Formatter.Value allow_leading_zeros=True
t1_parsed_zeros = [0, 0, 0, 1, -1, 1, 0, 10, 12345, Nothing]
t6 = t1.parse_values value_formatter=opts column_types=[Column_Type_Selection.Value 0 Integer]
t6.at "ints" . to_vector . should_equal t1_parsed_zeros
Warning.get_all t6 . should_equal []
t7 = t1.parse_values value_formatter=opts column_types=[Column_Type_Selection.Value 0 Decimal]
t7.at "ints" . to_vector . should_equal t1_parsed_zeros
Warning.get_all t7 . should_equal []
t8 = t2.parse_values value_formatter=opts column_types=[Column_Type_Selection.Value 0 Decimal]
t8.at "floats" . to_vector . should_equal [0.0, 0.0, 0.0, 1.0, -10.0, 1.0]
Warning.get_all t8 . should_equal []
t7 = t1.parse_values value_formatter=opts column_types=[Column_Type_Selection.Value 0 Decimal]
t7.at "ints" . to_vector . should_equal t1_parsed_zeros
Warning.get_all t7 . should_equal []
t8 = t2.parse_values value_formatter=opts column_types=[Column_Type_Selection.Value 0 Decimal]
t8.at "floats" . to_vector . should_equal [0.0, 0.0, 0.0, 1.0, -10.0, 1.0]
Warning.get_all t8 . should_equal []
Test.specify "should correctly parse booleans" <|
t1 = Table.new [["bools", ["true", "false", "True", "TRUE", "FALSE", Nothing, "False"]]]
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Boolean]
t2.at "bools" . to_vector . should_equal [True, False, True, True, False, Nothing, False]
Test.specify "should correctly parse booleans" <|
t1 = Table.new [["bools", ["true", "false", "True", "TRUE", "FALSE", Nothing, "False"]]]
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Boolean]
t2.at "bools" . to_vector . should_equal [True, False, True, True, False, Nothing, False]
Test.specify "should correctly parse date and time" <|
t1 = Table.new [["dates", ["2022-05-07", "2000-01-01", "2010-12-31"]]]
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Date]
t2.at "dates" . to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31]
Test.specify "should correctly parse date and time" <|
t1 = Table.new [["dates", ["2022-05-07", "2000-01-01", "2010-12-31"]]]
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Date]
t2.at "dates" . to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31]
t3 = Table.new [["datetimes", ["2022-05-07 23:59:59", "2000-01-01 00:00:00", "2010-12-31 12:34:56"]]]
t4 = t3.parse_values column_types=[Column_Type_Selection.Value 0 Date_Time]
t4.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56]
t3 = Table.new [["datetimes", ["2022-05-07 23:59:59", "2000-01-01 00:00:00", "2010-12-31 12:34:56"]]]
t4 = t3.parse_values column_types=[Column_Type_Selection.Value 0 Date_Time]
t4.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56]
t5 = Table.new [["times", ["23:59:59", "00:00:00", "12:34:56"]]]
t6 = t5.parse_values column_types=[Column_Type_Selection.Value 0 Time_Of_Day]
t6.at "times" . to_vector . should_equal [Time_Of_Day.new 23 59 59, Time_Of_Day.new, Time_Of_Day.new 12 34 56]
t5 = Table.new [["times", ["23:59:59", "00:00:00", "12:34:56"]]]
t6 = t5.parse_values column_types=[Column_Type_Selection.Value 0 Time_Of_Day]
t6.at "times" . to_vector . should_equal [Time_Of_Day.new 23 59 59, Time_Of_Day.new, Time_Of_Day.new 12 34 56]
Test.specify "should parse date and time in various formats" <|
opts = Data_Formatter.Value date_formats=["d.M.y", "d MMM y[ G]", "E, d MMM y"] datetime_formats=["yyyy-MM-dd'T'HH:mm:ss", "dd/MM/yyyy HH:mm"] time_formats=["H:mm:ss.n", "h:mma"]
Test.specify "should parse date and time in various formats" <|
opts = Data_Formatter.Value date_formats=["d.M.y", "d MMM y[ G]", "E, d MMM y"] datetime_formats=["yyyy-MM-dd'T'HH:mm:ss", "dd/MM/yyyy HH:mm"] time_formats=["H:mm:ss.n", "h:mma"]
t1 = Table.new [["dates", ["1.2.476", "10 Jan 1900 AD", "Tue, 3 Jun 2008"]]]
t2 = t1.parse_values opts column_types=[Column_Type_Selection.Value 0 Date]
t2.at "dates" . to_vector . should_equal [Date.new 476 2 1, Date.new 1900 1 10, Date.new 2008 6 3]
t1 = Table.new [["dates", ["1.2.476", "10 Jan 1900 AD", "Tue, 3 Jun 2008"]]]
t2 = t1.parse_values opts column_types=[Column_Type_Selection.Value 0 Date]
t2.at "dates" . to_vector . should_equal [Date.new 476 2 1, Date.new 1900 1 10, Date.new 2008 6 3]
t3 = Table.new [["datetimes", ["2011-12-03T10:15:30", "31/12/2012 22:33"]]]
t4 = t3.parse_values opts column_types=[Column_Type_Selection.Value 0 Date_Time]
t4.at "datetimes" . to_vector . should_equal [Date_Time.new 2011 12 3 10 15 30, Date_Time.new 2012 12 31 22 33]
t3 = Table.new [["datetimes", ["2011-12-03T10:15:30", "31/12/2012 22:33"]]]
t4 = t3.parse_values opts column_types=[Column_Type_Selection.Value 0 Date_Time]
t4.at "datetimes" . to_vector . should_equal [Date_Time.new 2011 12 3 10 15 30, Date_Time.new 2012 12 31 22 33]
t5 = Table.new [["times", ["1:02:03.987654321", "1:30PM"]]]
t6 = t5.parse_values opts column_types=[Column_Type_Selection.Value 0 Time_Of_Day]
t6.at "times" . to_vector . should_equal [Time_Of_Day.new 1 2 3 nanosecond=987654321, Time_Of_Day.new 13 30 0 0]
t5 = Table.new [["times", ["1:02:03.987654321", "1:30PM"]]]
t6 = t5.parse_values opts column_types=[Column_Type_Selection.Value 0 Time_Of_Day]
t6.at "times" . to_vector . should_equal [Time_Of_Day.new 1 2 3 nanosecond=987654321, Time_Of_Day.new 13 30 0 0]
Test.specify "should warn when cells do not fit the expected format" <|
ints = ["ints", ["0", "1", "1.0", "foobar", "", "--1", "+-1", "10", "-+1"]]
# Currently scientific notation is not supported and we document that in this test, in the future the situation may change and the test may need to be flipped.
floats = ["floats", ["0", "2.0", "1e6", "foobar", "", "--1", "+-1", "100.", "-+1"]]
bools = ["bools", ["True", "false", "fAlSE", "foobar", "", "0", "1", "true", "truefalse"]]
time_content = ["2001-01-01", "2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
t = Table.new [ints, floats, bools, ["dates", time_content], ["datetimes", time_content], ["times", time_content]]
Test.specify "should warn when cells do not fit the expected format" <|
ints = ["ints", ["0", "1", "1.0", "foobar", "", "--1", "+-1", "10", "-+1"]]
# Currently scientific notation is not supported and we document that in this test, in the future the situation may change and the test may need to be flipped.
floats = ["floats", ["0", "2.0", "1e6", "foobar", "", "--1", "+-1", "100.", "-+1"]]
bools = ["bools", ["True", "false", "fAlSE", "foobar", "", "0", "1", "true", "truefalse"]]
time_content = ["2001-01-01", "2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
t = Table.new [ints, floats, bools, ["dates", time_content], ["datetimes", time_content], ["times", time_content]]
action pb =
t.parse_values column_types=[Column_Type_Selection.Value 0 Integer, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "dates" Date, Column_Type_Selection.Value "datetimes" Date_Time, Column_Type_Selection.Value "times" Time_Of_Day] on_problems=pb
tester t =
t.at "ints" . to_vector . should_equal [0, 1, Nothing, Nothing, Nothing, Nothing, Nothing, 10, Nothing]
t.at "floats" . to_vector . should_equal [0, 2, Nothing, Nothing, Nothing, Nothing, Nothing, 100, Nothing]
t.at "bools" . to_vector . should_equal [True, False, Nothing, Nothing, Nothing, Nothing, Nothing, True, Nothing]
t.at "dates" . to_vector . should_equal [Date.new 2001 1 1, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
t.at "datetimes" . to_vector . should_equal [Nothing, Date_Time.new 2001 1 1 12 34 56, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
t.at "times" . to_vector . should_equal [Nothing, Nothing, Time_Of_Day.new 10 0 10, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
problems =
ints = Invalid_Format.Error "ints" Integer ["1.0", "foobar", "", "--1", "+-1", "-+1"]
floats = Invalid_Format.Error "floats" Decimal ["1e6", "foobar", "", "--1", "+-1", "-+1"]
bools = Invalid_Format.Error "bools" Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]
dates = Invalid_Format.Error "dates" Date ["2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
datetimes = Invalid_Format.Error "datetimes" Date_Time ["2001-01-01", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
times = Invalid_Format.Error "times" Time_Of_Day ["2001-01-01", "2001-01-01 12:34:56", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
[ints, floats, bools, dates, datetimes, times]
Problems.test_problem_handling action problems tester
action pb =
t.parse_values column_types=[Column_Type_Selection.Value 0 Integer, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "dates" Date, Column_Type_Selection.Value "datetimes" Date_Time, Column_Type_Selection.Value "times" Time_Of_Day] on_problems=pb
tester t =
t.at "ints" . to_vector . should_equal [0, 1, Nothing, Nothing, Nothing, Nothing, Nothing, 10, Nothing]
t.at "floats" . to_vector . should_equal [0, 2, Nothing, Nothing, Nothing, Nothing, Nothing, 100, Nothing]
t.at "bools" . to_vector . should_equal [True, False, Nothing, Nothing, Nothing, Nothing, Nothing, True, Nothing]
t.at "dates" . to_vector . should_equal [Date.new 2001 1 1, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
t.at "datetimes" . to_vector . should_equal [Nothing, Date_Time.new 2001 1 1 12 34 56, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
t.at "times" . to_vector . should_equal [Nothing, Nothing, Time_Of_Day.new 10 0 10, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
problems =
ints = Invalid_Format.Error "ints" Integer ["1.0", "foobar", "", "--1", "+-1", "-+1"]
floats = Invalid_Format.Error "floats" Decimal ["1e6", "foobar", "", "--1", "+-1", "-+1"]
bools = Invalid_Format.Error "bools" Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]
dates = Invalid_Format.Error "dates" Date ["2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
datetimes = Invalid_Format.Error "datetimes" Date_Time ["2001-01-01", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
times = Invalid_Format.Error "times" Time_Of_Day ["2001-01-01", "2001-01-01 12:34:56", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
[ints, floats, bools, dates, datetimes, times]
Problems.test_problem_handling action problems tester
Test.specify "should leave not selected columns unaffected" <|
t1 = Table.new [["A", ["1", "2"]], ["B", ["3", "4"]]]
t2 = t1.parse_values column_types=[Column_Type_Selection.Value "B" Integer]
t2.at "A" . to_vector . should_equal ["1", "2"]
t2.at "B" . to_vector . should_equal [3, 4]
Test.specify "should leave not selected columns unaffected" <|
t1 = Table.new [["A", ["1", "2"]], ["B", ["3", "4"]]]
t2 = t1.parse_values column_types=[Column_Type_Selection.Value "B" Integer]
t2.at "A" . to_vector . should_equal ["1", "2"]
t2.at "B" . to_vector . should_equal [3, 4]
Test.specify "should report missing columns in Column_Type_Selection" <|
t = Table.new [["floats", ["1.0"]], ["ints", ["1"]], ["bools", ["True"]]]
action pb =
t.parse_values column_types=[Column_Type_Selection.Value -1 Boolean, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bars" Boolean, Column_Type_Selection.Value "foos" Boolean, Column_Type_Selection.Value 100 Boolean, Column_Type_Selection.Value -100 Boolean] on_problems=pb
tester t =
t.at "floats" . to_vector . should_equal [1.0]
t.at "ints" . to_vector . should_equal ["1"]
t.at "bools" . to_vector . should_equal [True]
problems = [Missing_Input_Columns.Error ["bars", "foos"], Column_Indexes_Out_Of_Range.Error [100, -100]]
Problems.test_problem_handling action problems tester
Test.specify "should report missing columns in Column_Type_Selection" <|
t = Table.new [["floats", ["1.0"]], ["ints", ["1"]], ["bools", ["True"]]]
action pb =
t.parse_values column_types=[Column_Type_Selection.Value -1 Boolean, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bars" Boolean, Column_Type_Selection.Value "foos" Boolean, Column_Type_Selection.Value 100 Boolean, Column_Type_Selection.Value -100 Boolean] on_problems=pb
tester t =
t.at "floats" . to_vector . should_equal [1.0]
t.at "ints" . to_vector . should_equal ["1"]
t.at "bools" . to_vector . should_equal [True]
problems = [Missing_Input_Columns.Error ["bars", "foos"], Column_Indexes_Out_Of_Range.Error [100, -100]]
Problems.test_problem_handling action problems tester
Test.specify "should report duplicate columns in Column_Type_Selection" <|
t = Table.new [["floats", ["1.0"]], ["ints", ["1"]], ["bools", ["True"]], ["foo", ["42"]]]
action pb =
t.parse_values column_types=[Column_Type_Selection.Value -2 Boolean, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "floats" Integer, Column_Type_Selection.Value "ints" Integer] on_problems=pb
tester t =
t.at "floats" . to_vector . should_equal ["1.0"]
t.at "ints" . to_vector . should_equal [1]
t.at "bools" . to_vector . should_equal [True]
t.at "foo" . to_vector . should_equal ["42"]
problems = [(Duplicate_Type_Selector.Error "floats" ambiguous=True), (Duplicate_Type_Selector.Error "bools" ambiguous=False)]
Problems.test_problem_handling action problems tester
Test.specify "should report duplicate columns in Column_Type_Selection" <|
t = Table.new [["floats", ["1.0"]], ["ints", ["1"]], ["bools", ["True"]], ["foo", ["42"]]]
action pb =
t.parse_values column_types=[Column_Type_Selection.Value -2 Boolean, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "floats" Integer, Column_Type_Selection.Value "ints" Integer] on_problems=pb
tester t =
t.at "floats" . to_vector . should_equal ["1.0"]
t.at "ints" . to_vector . should_equal [1]
t.at "bools" . to_vector . should_equal [True]
t.at "foo" . to_vector . should_equal ["42"]
problems = [(Duplicate_Type_Selector.Error "floats" ambiguous=True), (Duplicate_Type_Selector.Error "bools" ambiguous=False)]
Problems.test_problem_handling action problems tester
Test.specify "should guess the datatype for columns" <|
c1 = ["ints", ["1", " +2", "-123", Nothing]]
c2 = ["ints0", ["01", "02 ", Nothing, "-1"]]
c3 = ["floats", [" 1.0 ", "2.2", Nothing, "-1.0"]]
c4 = ["bools", ["true", " False", Nothing, "True"]]
c5 = ["floats+ints", ["1", "2.2 ", "-1.0", Nothing]]
c6 = ["text", ["foobar", "foo", "", Nothing]]
c7 = ["dates", ["2022-10-01", " 2000-01-01", "1999-01-02", Nothing]]
c8 = ["datetimes", ["2022-10-01 01:02:03 ", "2000-01-01 01:02:03", "1999-01-02 01:02:03", Nothing]]
c9 = ["times", ["01:02:03", " 00:00:00 ", "01:02:03", Nothing]]
c10 = ["mixeddates", ["2022-10-01", "2000-01-01 01:02:03", "01:02:03", Nothing]]
c11 = ["text+ints", ["1", "2", " foobar", Nothing]]
t = Table.new [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11]
t2 = t.parse_values
Test.specify "should guess the datatype for columns" <|
c1 = ["ints", ["1", " +2", "-123", Nothing]]
c2 = ["ints0", ["01", "02 ", Nothing, "-1"]]
c3 = ["floats", [" 1.0 ", "2.2", Nothing, "-1.0"]]
c4 = ["bools", ["true", " False", Nothing, "True"]]
c5 = ["floats+ints", ["1", "2.2 ", "-1.0", Nothing]]
c6 = ["text", ["foobar", "foo", "", Nothing]]
c7 = ["dates", ["2022-10-01", " 2000-01-01", "1999-01-02", Nothing]]
c8 = ["datetimes", ["2022-10-01 01:02:03 ", "2000-01-01 01:02:03", "1999-01-02 01:02:03", Nothing]]
c9 = ["times", ["01:02:03", " 00:00:00 ", "01:02:03", Nothing]]
c10 = ["mixeddates", ["2022-10-01", "2000-01-01 01:02:03", "01:02:03", Nothing]]
c11 = ["text+ints", ["1", "2", " foobar", Nothing]]
t = Table.new [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11]
t2 = t.parse_values
Warning.get_all t2 . should_equal []
t2.at "ints" . to_vector . should_equal [1, 2, -123, Nothing]
t2.at "ints" . to_vector . first . should_be_an Integer
t2.at "ints0" . to_vector . should_equal ["01", "02", Nothing, "-1"]
t2.at "floats" . to_vector . should_equal [1.0, 2.2, Nothing, -1.0]
t2.at "bools" . to_vector . should_equal [True, False, Nothing, True]
t2.at "floats+ints" . to_vector . should_equal [1.0, 2.2, -1.0, Nothing]
t2.at "text" . to_vector . should_equal ["foobar", "foo", "", Nothing]
t2.at "dates" . to_vector . should_equal [Date.new 2022 10 1, Date.new 2000 1 1, Date.new 1999 1 2, Nothing]
t2.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 10 1 1 2 3, Date_Time.new 2000 1 1 1 2 3, Date_Time.new 1999 1 2 1 2 3, Nothing]
t2.at "times" . to_vector . should_equal [Time_Of_Day.new 1 2 3, Time_Of_Day.new, Time_Of_Day.new 1 2 3, Nothing]
t2.at "mixeddates" . to_vector . should_equal ["2022-10-01", "2000-01-01 01:02:03", "01:02:03", Nothing]
t2.at "text+ints" . to_vector . should_equal ["1", "2", "foobar", Nothing]
Warning.get_all t2 . should_equal []
t2.at "ints" . to_vector . should_equal [1, 2, -123, Nothing]
t2.at "ints" . to_vector . first . should_be_an Integer
t2.at "ints0" . to_vector . should_equal ["01", "02", Nothing, "-1"]
t2.at "floats" . to_vector . should_equal [1.0, 2.2, Nothing, -1.0]
t2.at "bools" . to_vector . should_equal [True, False, Nothing, True]
t2.at "floats+ints" . to_vector . should_equal [1.0, 2.2, -1.0, Nothing]
t2.at "text" . to_vector . should_equal ["foobar", "foo", "", Nothing]
t2.at "dates" . to_vector . should_equal [Date.new 2022 10 1, Date.new 2000 1 1, Date.new 1999 1 2, Nothing]
t2.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 10 1 1 2 3, Date_Time.new 2000 1 1 1 2 3, Date_Time.new 1999 1 2 1 2 3, Nothing]
t2.at "times" . to_vector . should_equal [Time_Of_Day.new 1 2 3, Time_Of_Day.new, Time_Of_Day.new 1 2 3, Nothing]
t2.at "mixeddates" . to_vector . should_equal ["2022-10-01", "2000-01-01 01:02:03", "01:02:03", Nothing]
t2.at "text+ints" . to_vector . should_equal ["1", "2", "foobar", Nothing]
t3 = Table.new [["bools", ["1", "0", "True"]], ["ints", ["1", "0", "0"]]] . parse_values (Data_Formatter.Value true_values=["1", "True"] false_values=["0", "False"])
t3.at "bools" . to_vector . should_equal [True, False, True]
t3.at "ints" . to_vector . should_equal [1, 0, 0]
t3 = Table.new [["bools", ["1", "0", "True"]], ["ints", ["1", "0", "0"]]] . parse_values (Data_Formatter.Value true_values=["1", "True"] false_values=["0", "False"])
t3.at "bools" . to_vector . should_equal [True, False, True]
t3.at "ints" . to_vector . should_equal [1, 0, 0]
t4 = Table.new [c2] . parse_values (Data_Formatter.Value allow_leading_zeros=True)
t4 . at "ints0" . to_vector . should_equal [1, 2, Nothing, -1]
t4 = Table.new [c2] . parse_values (Data_Formatter.Value allow_leading_zeros=True)
t4 . at "ints0" . to_vector . should_equal [1, 2, Nothing, -1]
t5 = t.parse_values column_types=[Column_Type_Selection.Value "ints" Decimal, Column_Type_Selection.Value "floats" Auto, Column_Type_Selection.Value "text+ints" Auto]
t5.at "ints" . to_vector . should_equal [1.0, 2.0, -123.0, Nothing]
# `ints` are requested to be parsed as decimals.
t5.at "ints" . to_vector . first . should_be_a Decimal
# `floats` are auto-detected as decimals.
t5.at "floats" . to_vector . should_equal [1.0, 2.2, Nothing, -1.0]
# `text+ints` is attempted to be parsed (hence whitespace is stripped), but it only fits the text type.
t5.at "text+ints" . to_vector . should_equal ["1", "2", "foobar", Nothing]
# `bools` are not requested to be parsed, so they are kept as-is, with leading whitespace etc.
t5.at "bools" . to_vector . should_equal ["true", " False", Nothing, "True"]
t5 = t.parse_values column_types=[Column_Type_Selection.Value "ints" Decimal, Column_Type_Selection.Value "floats" Auto, Column_Type_Selection.Value "text+ints" Auto]
t5.at "ints" . to_vector . should_equal [1.0, 2.0, -123.0, Nothing]
# `ints` are requested to be parsed as decimals.
t5.at "ints" . to_vector . first . should_be_a Decimal
# `floats` are auto-detected as decimals.
t5.at "floats" . to_vector . should_equal [1.0, 2.2, Nothing, -1.0]
# `text+ints` is attempted to be parsed (hence whitespace is stripped), but it only fits the text type.
t5.at "text+ints" . to_vector . should_equal ["1", "2", "foobar", Nothing]
# `bools` are not requested to be parsed, so they are kept as-is, with leading whitespace etc.
t5.at "bools" . to_vector . should_equal ["true", " False", Nothing, "True"]
Test.specify "should allow to specify a thousands separator and a custom decimal point" <|
opts = Data_Formatter.Value decimal_point=',' thousand_separator='_'
t1 = Table.new [["floats", ["0,0", "+0,0", "-0,0", "+1,5", "-1,2", "1,0", "0,0000", "10_000,", ",0"]]]
t2 = t1.parse_values opts column_types=[Column_Type_Selection.Value 0 Decimal]
t2.at "floats" . to_vector . should_equal [0.0, 0.0, 0.0, 1.5, -1.2, 1.0, 0.0, 10000.0, 0.0]
Test.specify "should allow to specify a thousands separator and a custom decimal point" <|
opts = Data_Formatter.Value decimal_point=',' thousand_separator='_'
t1 = Table.new [["floats", ["0,0", "+0,0", "-0,0", "+1,5", "-1,2", "1,0", "0,0000", "10_000,", ",0"]]]
t2 = t1.parse_values opts column_types=[Column_Type_Selection.Value 0 Decimal]
t2.at "floats" . to_vector . should_equal [0.0, 0.0, 0.0, 1.5, -1.2, 1.0, 0.0, 10000.0, 0.0]
t3 = Table.new [["xs", ["1,2", "1.2", "_0", "0_", "1_0_0"]]]
t4 = t3.parse_values opts column_types=[Column_Type_Selection.Value 0 Decimal]
t4.at "xs" . to_vector . should_equal [1.2, Nothing, Nothing, Nothing, 100.0]
Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "xs" Decimal ["1.2", "_0", "0_"]]
t5 = t3.parse_values opts column_types=[Column_Type_Selection.Value 0 Integer]
t5.at "xs" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, 100.0]
Problems.get_attached_warnings t5 . should_equal [Invalid_Format.Error "xs" Integer ["1,2", "1.2", "_0", "0_"]]
t3 = Table.new [["xs", ["1,2", "1.2", "_0", "0_", "1_0_0"]]]
t4 = t3.parse_values opts column_types=[Column_Type_Selection.Value 0 Decimal]
t4.at "xs" . to_vector . should_equal [1.2, Nothing, Nothing, Nothing, 100.0]
Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "xs" Decimal ["1.2", "_0", "0_"]]
t5 = t3.parse_values opts column_types=[Column_Type_Selection.Value 0 Integer]
t5.at "xs" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, 100.0]
Problems.get_attached_warnings t5 . should_equal [Invalid_Format.Error "xs" Integer ["1,2", "1.2", "_0", "0_"]]
Test.specify "should allow to specify custom values for booleans" <|
opts_1 = Data_Formatter.Value true_values=["1", "YES"] false_values=["0"]
t1 = Table.new [["bools", ["1", "0", "YES", "1", "0"]]]
t2 = t1.parse_values opts_1 column_types=[Column_Type_Selection.Value 0 Boolean]
t2.at "bools" . to_vector . should_equal [True, False, True, True, False]
Test.specify "should allow to specify custom values for booleans" <|
opts_1 = Data_Formatter.Value true_values=["1", "YES"] false_values=["0"]
t1 = Table.new [["bools", ["1", "0", "YES", "1", "0"]]]
t2 = t1.parse_values opts_1 column_types=[Column_Type_Selection.Value 0 Boolean]
t2.at "bools" . to_vector . should_equal [True, False, True, True, False]
t3 = Table.new [["bools", ["1", "NO", "False", "True", "YES", "no", "oui", "0"]]]
t4 = t3.parse_values opts_1 column_types=[Column_Type_Selection.Value 0 Boolean]
t4.at "bools" . to_vector . should_equal [True, Nothing, Nothing, Nothing, True, Nothing, Nothing, False]
Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "bools" Boolean ["NO", "False", "True", "no", "oui"]]
t3 = Table.new [["bools", ["1", "NO", "False", "True", "YES", "no", "oui", "0"]]]
t4 = t3.parse_values opts_1 column_types=[Column_Type_Selection.Value 0 Boolean]
t4.at "bools" . to_vector . should_equal [True, Nothing, Nothing, Nothing, True, Nothing, Nothing, False]
Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "bools" Boolean ["NO", "False", "True", "no", "oui"]]
whitespace_table =
ints = ["ints", ["0", "1 ", "0 1", " 2"]]
floats = ["floats", ["0 ", " 2.0", "- 1", "10.0"]]
bools = ["bools", ["True ", " false", "t rue", "False"]]
dates = ["dates", [" 2022-01-01", "2022-07-17 ", "2022 - 07 - 17", ""]]
datetimes = ["datetimes", [" 2022-01-01 11:59:00 ", Nothing, "2022 - 07 - 17 1:2:3 ", "2022-01-01 11:59:00"]]
times = ["times", ["11:00:00 ", " 00:00:00", "00 : 00 : 00", Nothing]]
Table.new [ints, floats, bools, dates, datetimes, times]
whitespace_table =
ints = ["ints", ["0", "1 ", "0 1", " 2"]]
floats = ["floats", ["0 ", " 2.0", "- 1", "10.0"]]
bools = ["bools", ["True ", " false", "t rue", "False"]]
dates = ["dates", [" 2022-01-01", "2022-07-17 ", "2022 - 07 - 17", ""]]
datetimes = ["datetimes", [" 2022-01-01 11:59:00 ", Nothing, "2022 - 07 - 17 1:2:3 ", "2022-01-01 11:59:00"]]
times = ["times", ["11:00:00 ", " 00:00:00", "00 : 00 : 00", Nothing]]
Table.new [ints, floats, bools, dates, datetimes, times]
Test.specify "should trim input values by default" <|
t1 = whitespace_table.parse_values column_types=[Column_Type_Selection.Value 0 Integer, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "dates" Date, Column_Type_Selection.Value "datetimes" Date_Time, Column_Type_Selection.Value "times" Time_Of_Day]
t1.at "ints" . to_vector . should_equal [0, 1, Nothing, 2]
t1.at "floats" . to_vector . should_equal [0.0, 2.0, Nothing, 10.0]
t1.at "bools" . to_vector . should_equal [True, False, Nothing, False]
t1.at "dates" . to_vector . should_equal [Date.new 2022 1 1, Date.new 2022 7 17, Nothing, Nothing]
t1.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 1 1 11 59, Nothing, Nothing, Nothing]
t1.at "times" . to_vector . should_equal [Time_Of_Day.new 11 0 0, Time_Of_Day.new, Nothing, Nothing]
warnings = Problems.get_attached_warnings t1
expected_warnings = Vector.new_builder
expected_warnings.append (Invalid_Format.Error "ints" Integer ["0 1"])
expected_warnings.append (Invalid_Format.Error "floats" Decimal ["- 1"])
expected_warnings.append (Invalid_Format.Error "bools" Boolean ["t rue"])
expected_warnings.append (Invalid_Format.Error "dates" Date ["2022 - 07 - 17", ""])
expected_warnings.append (Invalid_Format.Error "datetimes" Date_Time ["2022 - 07 - 17 1:2:3", "2022-01-01 11:59:00"])
expected_warnings.append (Invalid_Format.Error "times" Time_Of_Day ["00 : 00 : 00"])
warnings.should_contain_the_same_elements_as expected_warnings.to_vector
Test.specify "should trim input values by default" <|
t1 = whitespace_table.parse_values column_types=[Column_Type_Selection.Value 0 Integer, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "dates" Date, Column_Type_Selection.Value "datetimes" Date_Time, Column_Type_Selection.Value "times" Time_Of_Day]
t1.at "ints" . to_vector . should_equal [0, 1, Nothing, 2]
t1.at "floats" . to_vector . should_equal [0.0, 2.0, Nothing, 10.0]
t1.at "bools" . to_vector . should_equal [True, False, Nothing, False]
t1.at "dates" . to_vector . should_equal [Date.new 2022 1 1, Date.new 2022 7 17, Nothing, Nothing]
t1.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 1 1 11 59, Nothing, Nothing, Nothing]
t1.at "times" . to_vector . should_equal [Time_Of_Day.new 11 0 0, Time_Of_Day.new, Nothing, Nothing]
warnings = Problems.get_attached_warnings t1
expected_warnings = Vector.new_builder
expected_warnings.append (Invalid_Format.Error "ints" Integer ["0 1"])
expected_warnings.append (Invalid_Format.Error "floats" Decimal ["- 1"])
expected_warnings.append (Invalid_Format.Error "bools" Boolean ["t rue"])
expected_warnings.append (Invalid_Format.Error "dates" Date ["2022 - 07 - 17", ""])
expected_warnings.append (Invalid_Format.Error "datetimes" Date_Time ["2022 - 07 - 17 1:2:3", "2022-01-01 11:59:00"])
expected_warnings.append (Invalid_Format.Error "times" Time_Of_Day ["00 : 00 : 00"])
warnings.should_contain_the_same_elements_as expected_warnings.to_vector
Test.specify "should fail to parse if whitespace is present and trimming is turned off" <|
opts = Data_Formatter.Value trim_values=False
t1 = whitespace_table.parse_values opts column_types=[Column_Type_Selection.Value 0 Integer, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "dates" Date, Column_Type_Selection.Value "datetimes" Date_Time, Column_Type_Selection.Value "times" Time_Of_Day]
t1.at "ints" . to_vector . should_equal [0, Nothing, Nothing, Nothing]
t1.at "floats" . to_vector . should_equal [Nothing, Nothing, Nothing, 10.0]
t1.at "bools" . to_vector . should_equal [Nothing, Nothing, Nothing, False]
nulls = Vector.new 4 _->Nothing
t1.at "dates" . to_vector . should_equal nulls
t1.at "datetimes" . to_vector . should_equal nulls
t1.at "times" . to_vector . should_equal nulls
warnings = Problems.get_attached_warnings t1
expected_warnings = Vector.new_builder
expected_warnings.append (Invalid_Format.Error "ints" Integer ["1 ", "0 1", " 2"])
expected_warnings.append (Invalid_Format.Error "floats" Decimal ["0 ", " 2.0", "- 1"])
expected_warnings.append (Invalid_Format.Error "bools" Boolean ["True ", " false", "t rue"])
expected_warnings.append (Invalid_Format.Error "dates" Date [" 2022-01-01", "2022-07-17 ", "2022 - 07 - 17", ""])
expected_warnings.append (Invalid_Format.Error "datetimes" Date_Time [" 2022-01-01 11:59:00 ", "2022 - 07 - 17 1:2:3 ", "2022-01-01 11:59:00"])
expected_warnings.append (Invalid_Format.Error "times" Time_Of_Day ["11:00:00 ", " 00:00:00", "00 : 00 : 00"])
warnings.should_contain_the_same_elements_as expected_warnings.to_vector
Test.specify "should fail to parse if whitespace is present and trimming is turned off" <|
opts = Data_Formatter.Value trim_values=False
t1 = whitespace_table.parse_values opts column_types=[Column_Type_Selection.Value 0 Integer, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "dates" Date, Column_Type_Selection.Value "datetimes" Date_Time, Column_Type_Selection.Value "times" Time_Of_Day]
t1.at "ints" . to_vector . should_equal [0, Nothing, Nothing, Nothing]
t1.at "floats" . to_vector . should_equal [Nothing, Nothing, Nothing, 10.0]
t1.at "bools" . to_vector . should_equal [Nothing, Nothing, Nothing, False]
nulls = Vector.new 4 _->Nothing
t1.at "dates" . to_vector . should_equal nulls
t1.at "datetimes" . to_vector . should_equal nulls
t1.at "times" . to_vector . should_equal nulls
warnings = Problems.get_attached_warnings t1
expected_warnings = Vector.new_builder
expected_warnings.append (Invalid_Format.Error "ints" Integer ["1 ", "0 1", " 2"])
expected_warnings.append (Invalid_Format.Error "floats" Decimal ["0 ", " 2.0", "- 1"])
expected_warnings.append (Invalid_Format.Error "bools" Boolean ["True ", " false", "t rue"])
expected_warnings.append (Invalid_Format.Error "dates" Date [" 2022-01-01", "2022-07-17 ", "2022 - 07 - 17", ""])
expected_warnings.append (Invalid_Format.Error "datetimes" Date_Time [" 2022-01-01 11:59:00 ", "2022 - 07 - 17 1:2:3 ", "2022-01-01 11:59:00"])
expected_warnings.append (Invalid_Format.Error "times" Time_Of_Day ["11:00:00 ", " 00:00:00", "00 : 00 : 00"])
warnings.should_contain_the_same_elements_as expected_warnings.to_vector
Test.specify "should fallback to text if whitespace is present and trimming is turned off" <|
c1 = ["1", " +2", "-123", Nothing]
c2 = [" 1.0 ", "2.2", Nothing, "-1.0"]
c3 = ["true", " False", Nothing, "True"]
t = Table.new [["ints", c1], ["floats", c2], ["bools", c3]]
t2 = t.parse_values (Data_Formatter.Value trim_values=False)
Test.specify "should fallback to text if whitespace is present and trimming is turned off" <|
c1 = ["1", " +2", "-123", Nothing]
c2 = [" 1.0 ", "2.2", Nothing, "-1.0"]
c3 = ["true", " False", Nothing, "True"]
t = Table.new [["ints", c1], ["floats", c2], ["bools", c3]]
t2 = t.parse_values (Data_Formatter.Value trim_values=False)
Warning.get_all t2 . should_equal []
t2.at "ints" . to_vector . should_equal c1
t2.at "floats" . to_vector . should_equal c2
t2.at "bools" . to_vector . should_equal c3
Warning.get_all t2 . should_equal []
t2.at "ints" . to_vector . should_equal c1
t2.at "floats" . to_vector . should_equal c2
t2.at "bools" . to_vector . should_equal c3
Test.group "Column.parse" <|
Test.specify "should correctly parse integers" <|
c1 = Column.from_vector "ints" ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing]
c2 = c1.parse Integer
c2.name.should_equal c1.name
c2 . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing]
Test.specify "should correctly parse decimals" <|
c1 = Column.from_vector "ints" ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing]
c2 = c1.parse Decimal
c2.name.should_equal c1.name
c2 . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing]
c2.to_vector . map .to_text . should_equal ["0.0", "0.0", "-0.0", "1.0", "-1.0", "1.0", "Nothing", "Nothing", "12345.0", "Nothing"]
c3 = Column.from_vector "floats" ["0.0", "+0.0", "-0.0", "+1.0", "-1.0", "1.0", "0.0000", "10.", "12345."]
c4 = c3.parse Decimal
c4.to_vector . should_equal [0, 0, 0, 1, -1, 1, 0, 10, 12345]
c5 = Column.from_vector "floats" [".0", "0.", "1.", ".1", ".123", "-.1", "+.1", "+0.0", "0.1234", Nothing, "11111111.111"]
c6 = c5.parse Decimal
c6.to_vector . should_equal [0.0, 0.0, 1.0, 0.1, 0.123, -0.1, 0.1, 0.0, 0.1234, Nothing, 11111111.111]
Test.specify "should correctly parse booleans" <|
c1 = Column.from_vector "bools" ["true", "false", "True", "TRUE", "FALSE", Nothing, "False"]
c2 = c1.parse Boolean
c2.name.should_equal c1.name
c2.to_vector . should_equal [True, False, True, True, False, Nothing, False]
c1.parse . to_vector . should_equal [True, False, True, True, False, Nothing, False]
c3 = Column.from_vector "bools" ["yes", "no", Nothing]
c4 = c3.parse Boolean "yes|no"
c4.to_vector . should_equal [True, False, Nothing]
Test.specify "should correctly parse date and time" <|
c1 = Column.from_vector "date" ["2022-05-07", "2000-01-01", "2010-12-31"]
c2 = c1.parse Date
c2.to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31]
c3 = Column.from_vector "datetimes" ["2022-05-07 23:59:59", "2000-01-01 00:00:00", "2010-12-31 12:34:56"]
c4 = c3.parse Date_Time
c4.to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56]
c5 = Column.from_vector "times" ["23:59:59", "00:00:00", "12:34:56"]
c6 = c5.parse Time_Of_Day
c6.to_vector . should_equal [Time_Of_Day.new 23 59 59, Time_Of_Day.new, Time_Of_Day.new 12 34 56]
Test.specify "should correctly parse date and time with format" <|
c1 = Column.from_vector "date" ["5/7/2022", "1/1/2000", "12/31/2010"]
c2 = c1.parse Date "M/d/yyyy"
c2.to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31]
c3 = Column.from_vector "datetimes" ["5/7/2022 23:59:59", "1/1/2000 00:00:00", "12/31/2010 12:34:56"]
c4 = c3.parse Date_Time "M/d/yyyy HH:mm:ss"
c4.to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56]
main = Test_Suite.run_main spec

View File

@ -844,7 +844,7 @@ spec =
t.filter "X" (Filter_Condition.Is_In in_vector) on_problems=Report_Error . at "X" . to_vector . should_equal expected_vector
t.filter "X" (Filter_Condition.Is_In in_column) on_problems=Report_Error . at "X" . to_vector . should_equal expected_vector
t2 = t.set "Y" (t.at "X" . not)
t2 = t.set (t.at "X" . not) new_name="Y"
t2.filter "Y" (Filter_Condition.Is_In in_vector) on_problems=Report_Error . at "Y" . to_vector . should_equal expected_neg_vector
t2.filter "Y" (Filter_Condition.Is_In in_column) on_problems=Report_Error . at "Y" . to_vector . should_equal expected_neg_vector

View File

@ -111,11 +111,23 @@ spec = Test.group "List" <|
txt = ["aaa", "bbb", "abab", "cccc", "baaa", "ś"].to_list
txt.filter (Filter_Condition.Contains "a") . should_equal ["aaa", "abab", "baaa"].to_list
txt.filter (Filter_Condition.Contains 'A' Case_Sensitivity.Sensitive) . should_equal [].to_list
txt.filter (Filter_Condition.Contains 'A' Case_Sensitivity.Insensitive) . should_equal ["aaa", "abab", "baaa"].to_list
txt.filter (Filter_Condition.Contains 's\u0301') . should_equal ["ś"].to_list
txt.filter (Filter_Condition.Contains 'S\u0301' Case_Sensitivity.Sensitive) . should_equal [].to_list
txt.filter (Filter_Condition.Contains 'S\u0301' Case_Sensitivity.Insensitive) . should_equal ["ś"].to_list
txt.filter (Filter_Condition.Not_Contains "a") . should_equal ["bbb", "cccc", "ś"].to_list
txt.filter (Filter_Condition.Not_Contains "A" Case_Sensitivity.Sensitive) . should_equal ["aaa", "bbb", "abab", "cccc", "baaa", "ś"].to_list
txt.filter (Filter_Condition.Not_Contains "A" Case_Sensitivity.Insensitive) . should_equal ["bbb", "cccc", "ś"].to_list
txt.filter (Filter_Condition.Not_Contains 's\u0301') . should_equal ["aaa", "bbb", "abab", "cccc", "baaa"].to_list
txt.filter (Filter_Condition.Not_Contains 'S\u0301' Case_Sensitivity.Sensitive) . should_equal ["aaa", "bbb", "abab", "cccc", "baaa", "ś"].to_list
txt.filter (Filter_Condition.Not_Contains 'S\u0301' Case_Sensitivity.Insensitive) . should_equal ["aaa", "bbb", "abab", "cccc", "baaa"].to_list
txt.filter (Filter_Condition.Starts_With "a") . should_equal ["aaa", "abab"].to_list
txt.filter (Filter_Condition.Starts_With "A" Case_Sensitivity.Sensitive) . should_equal [].to_list
txt.filter (Filter_Condition.Starts_With "A" Case_Sensitivity.Insensitive) . should_equal ["aaa", "abab"].to_list
txt.filter (Filter_Condition.Ends_With "a") . should_equal ["aaa", "baaa"].to_list
txt.filter (Filter_Condition.Ends_With "A" Case_Sensitivity.Sensitive) . should_equal [].to_list
txt.filter (Filter_Condition.Ends_With "A" Case_Sensitivity.Insensitive) . should_equal ["aaa", "baaa"].to_list
txt.filter (Filter_Condition.Less than="a") . should_equal List.Nil
txt.filter (Filter_Condition.Greater than="b") . should_equal ["bbb", "cccc", "baaa", "ś"].to_list
txt.filter (Filter_Condition.Between "b" "c") . should_equal ["bbb", "baaa"].to_list

View File

@ -269,8 +269,8 @@ spec =
foo
bar\r\tbaz
text_1.pretty.should_equal "'foo\nbar\r\tbaz'"
text_2 = '\n\t\a\b\f\r\v\e\''
text_2.pretty.should_equal "'\n\t\a\b\f\r\v\e\''"
text_2 = '\n\t\a\b\f\r\v\e\'\\'
text_2.pretty.should_equal "'\n\t\a\b\f\r\v\e\'\\'"
Test.specify "should return text as is when converting to text" <|
text_1 = '''

View File

@ -50,8 +50,8 @@ foreign python generate_nested_py_array = """
return [[1, 2, 3], [4, 5]]
spec = Test.group "Vectors" <|
pending_python_missing = if Polyglot.is_language_installed "python" then Nothing else """
Can't run Python tests, Python is not installed.
pending_python_missing = if Polyglot.is_language_installed "python" then Nothing else
"Can't run Python tests, Python is not installed."
Test.specify "text bytes" <|
"Lore".utf_8 . should_equal [76, 111, 114, 101]
@ -191,11 +191,23 @@ spec = Test.group "Vectors" <|
txtvec = ["aaa", "bbb", "abab", "cccc", "baaa", "ś"]
txtvec.filter (Filter_Condition.Contains "a") . should_equal ["aaa", "abab", "baaa"]
txtvec.filter (Filter_Condition.Contains 'A' Case_Sensitivity.Sensitive) . should_equal []
txtvec.filter (Filter_Condition.Contains 'A' Case_Sensitivity.Insensitive) . should_equal ["aaa", "abab", "baaa"]
txtvec.filter (Filter_Condition.Contains 's\u0301') . should_equal ["ś"]
txtvec.filter (Filter_Condition.Contains 'S\u0301' Case_Sensitivity.Sensitive) . should_equal []
txtvec.filter (Filter_Condition.Contains 'S\u0301' Case_Sensitivity.Insensitive) . should_equal ["ś"]
txtvec.filter (Filter_Condition.Not_Contains "a") . should_equal ["bbb", "cccc", "ś"]
txtvec.filter (Filter_Condition.Not_Contains "A" Case_Sensitivity.Sensitive) . should_equal ["aaa", "bbb", "abab", "cccc", "baaa", "ś"]
txtvec.filter (Filter_Condition.Not_Contains "A" Case_Sensitivity.Insensitive) . should_equal ["bbb", "cccc", "ś"]
txtvec.filter (Filter_Condition.Not_Contains 's\u0301') . should_equal ["aaa", "bbb", "abab", "cccc", "baaa"]
txtvec.filter (Filter_Condition.Not_Contains 'S\u0301' Case_Sensitivity.Sensitive) . should_equal ["aaa", "bbb", "abab", "cccc", "baaa", "ś"]
txtvec.filter (Filter_Condition.Not_Contains 'S\u0301' Case_Sensitivity.Insensitive) . should_equal ["aaa", "bbb", "abab", "cccc", "baaa"]
txtvec.filter (Filter_Condition.Starts_With "a") . should_equal ["aaa", "abab"]
txtvec.filter (Filter_Condition.Starts_With "A" Case_Sensitivity.Sensitive) . should_equal []
txtvec.filter (Filter_Condition.Starts_With "A" Case_Sensitivity.Insensitive) . should_equal ["aaa", "abab"]
txtvec.filter (Filter_Condition.Ends_With "a") . should_equal ["aaa", "baaa"]
txtvec.filter (Filter_Condition.Ends_With "A" Case_Sensitivity.Sensitive) . should_equal []
txtvec.filter (Filter_Condition.Ends_With "A" Case_Sensitivity.Insensitive) . should_equal ["aaa", "baaa"]
txtvec.filter (Filter_Condition.Less than="a") . should_equal []
txtvec.filter (Filter_Condition.Greater than="b") . should_equal ["bbb", "cccc", "baaa", "ś"]
txtvec.filter (Filter_Condition.Between "b" "c") . should_equal ["bbb", "baaa"]

View File

@ -37,7 +37,7 @@ spec =
Widgets.get_full_annotations_json mock_table "at" ["selector"] . should_equal expect
Test.specify "works for `filter`" <|
expect = [["column", Widget.Single_Choice (mock_table.column_names.map .pretty) Nothing Display.Always], ["filter", Nothing], ["on_problems", Nothing]] . to_json
Widgets.get_full_annotations_json mock_table "filter" ["column", "filter", "on_problems"] . should_equal expect
expect = [["column", Widget.Single_Choice (mock_table.column_names.map .pretty) Nothing Display.Always]] . to_json
Widgets.get_full_annotations_json mock_table "filter" ["column"] . should_equal expect
main = Test_Suite.run_main spec

View File

@ -15,12 +15,12 @@ spec =
mock_table = Table.from_rows ["A", "B C", 'D "E"', "F.G/H\I"] []
Test.specify "works for `get` and `at`" <|
expect = [["selector", Widget.Single_Choice (mock_table.column_names.map .pretty) Nothing Display.Always]] . to_json
expect = [["selector", Widget.Single_Choice ["'A'", "'B C'", '\'D "E"\'', "'F.G/H\\I'"] Nothing Display.Always]] . to_json
Widgets.get_full_annotations_json mock_table "get" ["selector"] . should_equal expect
Widgets.get_full_annotations_json mock_table "at" ["selector"] . should_equal expect
Test.specify "works for `filter`" <|
expect = [["column", Widget.Single_Choice (mock_table.column_names.map .pretty) Nothing Display.Always], ["filter", Nothing], ["on_problems", Nothing]] . to_json
Widgets.get_full_annotations_json mock_table "filter" ["column", "filter", "on_problems"] . should_equal expect
expect = [["column", Widget.Single_Choice ["'A'", "'B C'", '\'D "E"\'', "'F.G/H\\I'"] Nothing Display.Always]] . to_json
Widgets.get_full_annotations_json mock_table "filter" ["column"] . should_equal expect
main = Test_Suite.run_main spec