mirror of
https://github.com/enso-org/enso.git
synced 2025-01-03 16:23:25 +03:00
New set function, parse a column (#4097)
- New `set` function design - takes a `Column` and works with that more easily and supports control of `Set_Mode`. - New simple `parse` API on `Column`. - Separated expression support for `filter` to new `filter_by_expression` on `Table`. - New `compute` function allowing creation of a column from an expression. - Added case sensitivity argument to `Column` based on `starts_with`, `ends_with` and `contains`. - Added case sensitivity argument to `Filter_Condition` for `Starts_With`, `Ends_With`, `Contains` and `Not_Contains`. - Fixed the issue in JS Table visualisation where JavaScript date was incorrectly set. - Some dynamic dropdown expressions - experimenting with ways to use them. - Fixed issue with `.pretty` that wasn't escaping `\`. - Changed default Postgres DB to `postgres`. - Fixed SQLite support for starts_with, ends_with and contains to be consistent (using GLOB not LIKE).
This commit is contained in:
parent
c965ad3455
commit
0790ce494f
@ -220,8 +220,8 @@
|
||||
API and added builders for customizing less common settings.][3516]
|
||||
- [Allow control of sort direction in `First` and `Last` aggregations.][3517]
|
||||
- [Implemented `Text.write`, replacing `File.write_text`.][3518]
|
||||
- [Removed obsolete `select`, `group`, `sort` and releated types from tables.]
|
||||
[3519]
|
||||
- [Removed obsolete `select`, `group`, `sort` and releated types from
|
||||
tables.][3519]
|
||||
- [Removed obsolete `from_xls` and `from_xlsx` functions. Added support for
|
||||
reading column names from first row in `File_Format.Excel`][3523]
|
||||
- [Added `File_Format.Delimited` support to `Table.write` for new files.][3528]
|
||||
@ -298,6 +298,8 @@
|
||||
backend.][4063]
|
||||
- [Updated `Text.starts_with`, `Text.ends_with` and `Text.contains` to new
|
||||
simpler API.][4078]
|
||||
- [Updated `Table.set` to new API. New `Column.parse` function and added case
|
||||
sensitivity to `Filter_Condition` and column functions.][4097]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -459,6 +461,7 @@
|
||||
[4052]: https://github.com/enso-org/enso/pull/4052
|
||||
[4063]: https://github.com/enso-org/enso/pull/4063
|
||||
[4078]: https://github.com/enso-org/enso/pull/4078
|
||||
[4097]: https://github.com/enso-org/enso/pull/4097
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
@ -590,7 +593,7 @@
|
||||
[3631]: https://github.com/enso-org/enso/pull/3631
|
||||
[3633]: https://github.com/enso-org/enso/pull/3633
|
||||
[3637]: https://github.com/enso-org/enso/pull/3637
|
||||
[3637]: https://github.com/enso-org/enso/pull/3638
|
||||
[3638]: https://github.com/enso-org/enso/pull/3638
|
||||
[3641]: https://github.com/enso-org/enso/pull/3641
|
||||
[3658]: https://github.com/enso-org/enso/pull/3658
|
||||
[3671]: https://github.com/enso-org/enso/pull/3671
|
||||
|
@ -179,14 +179,14 @@ class TableVisualization extends Visualization {
|
||||
if (content instanceof Object) {
|
||||
const type = content.type
|
||||
if (type === 'Date') {
|
||||
to_render = new Date(content.year, content.month, content.day)
|
||||
to_render = new Date(content.year, content.month - 1, content.day)
|
||||
.toISOString()
|
||||
.substring(0, 10)
|
||||
} else if (type === 'Time_Of_Day') {
|
||||
const js_date = new Date(
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
content.hour,
|
||||
content.minute,
|
||||
content.second,
|
||||
@ -198,7 +198,7 @@ class TableVisualization extends Visualization {
|
||||
} else if (type === 'Date_Time') {
|
||||
const js_date = new Date(
|
||||
content.year,
|
||||
content.month,
|
||||
content.month - 1,
|
||||
content.day,
|
||||
content.hour,
|
||||
content.minute,
|
||||
|
@ -1,4 +1,5 @@
|
||||
import project.Any.Any
|
||||
import project.Data.Text.Case_Sensitivity.Case_Sensitivity
|
||||
import project.Data.Text.Extensions
|
||||
import project.Data.Text.Regex
|
||||
import project.Data.Text.Text
|
||||
@ -7,6 +8,9 @@ import project.Nothing.Nothing
|
||||
|
||||
from project.Data.Boolean import all
|
||||
|
||||
from project.Metadata.Widget import Single_Choice
|
||||
import project.Metadata.Display
|
||||
|
||||
polyglot java import org.enso.base.Regex_Utils
|
||||
|
||||
from project.Data.Filter_Condition.Filter_Condition import all
|
||||
@ -39,7 +43,7 @@ type Filter_Condition
|
||||
It accepts a Text value to check if the value contains it. In case of
|
||||
Table operations, it can accept another column - then the corresponding
|
||||
values from the source column and the provided column are checked.
|
||||
Starts_With (prefix:Text)
|
||||
Starts_With (prefix:Text) (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default)
|
||||
|
||||
## Does the value end with a suffix (Text only)?
|
||||
|
||||
@ -47,7 +51,7 @@ type Filter_Condition
|
||||
It accepts a Text value to check if the value contains it. In case of
|
||||
Table operations, it can accept another column - then the corresponding
|
||||
values from the source column and the provided column are checked.
|
||||
Ends_With (suffix:Text)
|
||||
Ends_With (suffix:Text) (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default)
|
||||
|
||||
## Does the value contain the substring (Text only)?
|
||||
|
||||
@ -55,7 +59,7 @@ type Filter_Condition
|
||||
It accepts a Text value to check if the value contains it. In case of
|
||||
Table operations, it can accept another column - then the corresponding
|
||||
values from the source column and the provided column are checked.
|
||||
Contains (substring:Text)
|
||||
Contains (substring:Text) (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default)
|
||||
|
||||
## Is the substring not present in the value (Text only)?
|
||||
|
||||
@ -63,7 +67,7 @@ type Filter_Condition
|
||||
It accepts a Text value to check if the value contains it. In case of
|
||||
Table operations, it can accept another column - then the corresponding
|
||||
values from the source column and the provided column are checked.
|
||||
Not_Contains (substring:Text)
|
||||
Not_Contains (substring:Text) (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default)
|
||||
|
||||
## Is equal to Nothing?
|
||||
Is_Nothing
|
||||
@ -165,10 +169,10 @@ type Filter_Condition
|
||||
Not_Equal value -> !=value
|
||||
Between lower upper -> elem ->
|
||||
(lower <= elem) && (elem <= upper)
|
||||
Starts_With prefix -> _.starts_with prefix
|
||||
Ends_With suffix -> _.ends_with suffix
|
||||
Contains substring -> _.contains substring
|
||||
Not_Contains substring -> v-> v.contains substring . not
|
||||
Starts_With prefix case_sensitivity -> _.starts_with prefix case_sensitivity
|
||||
Ends_With suffix case_sensitivity -> _.ends_with suffix case_sensitivity
|
||||
Contains substring case_sensitivity -> _.contains substring case_sensitivity
|
||||
Not_Contains substring case_sensitivity -> v-> v.contains substring case_sensitivity . not
|
||||
Is_Nothing -> elem -> case elem of
|
||||
Nothing -> True
|
||||
_ -> False
|
||||
@ -196,6 +200,13 @@ type Filter_Condition
|
||||
Is_In values -> values.contains
|
||||
Not_In values -> elem -> values.contains elem . not
|
||||
|
||||
## PRIVATE
|
||||
Gets a widget set up for a Filter_Condition.
|
||||
widget_for_filter_condition =
|
||||
## values = ["(Filter_Condition.Equal to=_)", "(Filter_Condition.Not_Equal to=_)", "(Filter_Condition.Is_In values=_)", "(Filter_Condition.Not_In values=_)", "Filter_Condition.Is_True", "Filter_Condition.Is_False", "Filter_Condition.Is_Nothing", "Filter_Condition.Not_Nothing", "Filter_Condition.Is_Empty", "Filter_Condition.Not_Empty", "(Filter_Condition.Less than=_)", "(Filter_Condition.Equal_Or_Less than=_)", "(Filter_Condition.Greater than=_)", "(Filter_Condition.Equal_Or_Greater than=_)", "(Filter_Condition.Between lower=_ upper=_)", "(Filter_Condition.Starts_With prefix=_)", "(Filter_Condition.Ends_With suffix=_)", "(Filter_Condition.Contains substring=_)", "(Filter_Condition.Not_Contains substring=_)", "(Filter_Condition.Like pattern=_)", "(Filter_Condition.Not_Like pattern=_)"]
|
||||
values = ["(Filter_Condition.Equal _)", "(Filter_Condition.Not_Equal _)", "(Filter_Condition.Is_In _)", "(Filter_Condition.Not_In _)", "Filter_Condition.Is_True", "Filter_Condition.Is_False", "Filter_Condition.Is_Nothing", "Filter_Condition.Not_Nothing", "Filter_Condition.Is_Empty", "Filter_Condition.Not_Empty", "(Filter_Condition.Less _)", "(Filter_Condition.Equal_Or_Less _)", "(Filter_Condition.Greater _)", "(Filter_Condition.Equal_Or_Greater _)", "(Filter_Condition.Between _ _)", "(Filter_Condition.Starts_With _)", "(Filter_Condition.Ends_With _)", "(Filter_Condition.Contains _)", "(Filter_Condition.Not_Contains _)", "(Filter_Condition.Like _)", "(Filter_Condition.Not_Like _)"]
|
||||
Single_Choice values display=Display.Always
|
||||
|
||||
## PRIVATE
|
||||
sql_like_to_regex sql_pattern =
|
||||
regex_pattern = Regex_Utils.sql_like_pattern_to_regex sql_pattern
|
||||
|
@ -742,7 +742,7 @@ Text.starts_with self prefix case_sensitivity=Case_Sensitivity.Sensitive = case
|
||||
Case_Sensitivity.Default -> self.starts_with prefix Case_Sensitivity.Sensitive
|
||||
Case_Sensitivity.Sensitive -> Text_Utils.starts_with self prefix
|
||||
Case_Sensitivity.Insensitive locale ->
|
||||
self.take (Index_Sub_Range.First prefix.length) . equals_ignore_case prefix locale=locale
|
||||
self.take (Index_Sub_Range.First prefix.length) . equals_ignore_case prefix locale=locale
|
||||
|
||||
## ALIAS Check Suffix
|
||||
|
||||
@ -770,7 +770,7 @@ Text.ends_with self suffix case_sensitivity=Case_Sensitivity.Sensitive = case ca
|
||||
Case_Sensitivity.Default -> self.ends_with suffix Case_Sensitivity.Sensitive
|
||||
Case_Sensitivity.Sensitive -> Text_Utils.ends_with self suffix
|
||||
Case_Sensitivity.Insensitive locale ->
|
||||
self.take (Index_Sub_Range.Last suffix.length) . equals_ignore_case suffix locale=locale
|
||||
self.take (Index_Sub_Range.Last suffix.length) . equals_ignore_case suffix locale=locale
|
||||
|
||||
## ALIAS Contains
|
||||
|
||||
@ -812,7 +812,7 @@ Text.contains self term="" case_sensitivity=Case_Sensitivity.Sensitive = case ca
|
||||
Case_Sensitivity.Default -> self.contains term Case_Sensitivity.Sensitive
|
||||
Case_Sensitivity.Sensitive -> Text_Utils.contains self term
|
||||
Case_Sensitivity.Insensitive locale ->
|
||||
Text_Utils.contains_case_insensitive self term locale.java_locale
|
||||
Text_Utils.contains_case_insensitive self term locale.java_locale
|
||||
|
||||
## Takes an integer and returns a new text, consisting of `count` concatenated
|
||||
copies of `self`.
|
||||
|
@ -17,8 +17,16 @@ type Display
|
||||
## Parameter is only shown on the expanded view.
|
||||
Expanded_Only
|
||||
|
||||
type Parameter_Type
|
||||
Parameter value:Text label:Text="code" parameters:(Vector Widget)=[] icon:Text=""
|
||||
type Choice
|
||||
## Describes an entry in a Single_Choice or Multiple_Choice widget.
|
||||
|
||||
Fields:
|
||||
- value: The code to insert for the entry.
|
||||
- label: The text to display for the entry. By default, the `value` is used.
|
||||
- parameters: A list of parameters for the arguments for the `value`.
|
||||
This provides the structure needed for nested widgets.
|
||||
- icon: The icon to display for the entry. By default, no icon is used.
|
||||
Option value:Text label:Text=value parameters:(Vector (Pair Text Widget))=[] icon:Text=""
|
||||
|
||||
type File_Action
|
||||
## The File or Folder is for reading from.
|
||||
@ -27,30 +35,40 @@ type File_Action
|
||||
## The File or Folder is for writing to.
|
||||
Save
|
||||
|
||||
|
||||
type Widget
|
||||
## Describe a code parameter.
|
||||
Code_Input label:(Nothing|Text)=Nothing display:Display=Display.When_Modified
|
||||
Code_Input label:(Nothing | Text)=Nothing display:Display=Display.When_Modified
|
||||
|
||||
## Describe a boolean parameter.
|
||||
Boolean_Input label:Nothing|Text=Nothing display:Display=Display.When_Modified
|
||||
Boolean_Input label:(Nothing | Text)=Nothing display:Display=Display.When_Modified
|
||||
|
||||
## Describe a numeric parameter.
|
||||
Numeric_Input label:Nothing|Text=Nothing display:Display=Display.When_Modified minimum:Integer|Nothing=Nothing maximum:Integer|Nothing=Nothing step:Number=1 decimal_places:Integer=0 allow_outside:Boolean=True
|
||||
Numeric_Input label:(Nothing | Text)=Nothing display:Display=Display.When_Modified minimum:Integer|Nothing=Nothing maximum:Integer|Nothing=Nothing step:Number=1 decimal_places:Integer=0 allow_outside:Boolean=True
|
||||
|
||||
## Describes a text widget.
|
||||
Text_Input label:Nothing|Text=Nothing display:Display=Display.When_Modified quote_values:Boolean=True suggestions:(Vector Text)=[]
|
||||
Text_Input label:(Nothing | Text)=Nothing display:Display=Display.When_Modified quote_values:Boolean=True suggestions:(Vector Text)=[]
|
||||
|
||||
## Describes a single value widget.
|
||||
Single_Choice values:(Vector Parameter) label:Nothing|Text=Nothing display:Display=Display.When_Modified quote_values:Boolean=False allow_custom:Boolean=True
|
||||
## Describes a single value widget (drowdown).
|
||||
|
||||
Fields:
|
||||
- values: A list of choices to display.
|
||||
If a `Text` value is used, it is treated as `Option value:Text`.
|
||||
- label: The text to display for the widget.
|
||||
By default, the parameter name is used.
|
||||
- display: The display mode for the parameter.
|
||||
- quote_values: Should the values be quoted automatically?
|
||||
- allow_custom: Allow the user to enter a value not in the list?
|
||||
Single_Choice values:(Vector (Choice | Text)) label:(Nothing | Text)=Nothing display:Display=Display.When_Modified quote_values:Boolean=False allow_custom:Boolean=True
|
||||
|
||||
## Describes a multi value widget.
|
||||
Multiple_Choice values:(Vector Parameter) label:Nothing|Text=Nothing display:Display=Display.When_Modified quote_values:Boolean=False
|
||||
Multiple_Choice values:(Vector (Choice | Text)) label:(Nothing | Text)=Nothing display:Display=Display.When_Modified quote_values:Boolean=False
|
||||
|
||||
## Describes a list editor widget.
|
||||
Vector_Editor item_editor:Widget values:((Vector Parameter)|Nothing)=Nothing label:Nothing|Text=Nothing display:Display=Display.When_Modified
|
||||
Vector_Editor item_editor:Widget values:((Vector (Choice | Text)) | Nothing)=Nothing label:(Nothing | Text)=Nothing display:Display=Display.When_Modified
|
||||
|
||||
## Describes a folder chooser.
|
||||
Folder_Browse label:Nothing|Text=Nothing display:Display=Display.When_Modified
|
||||
Folder_Browse label:(Nothing | Text)=Nothing display:Display=Display.When_Modified
|
||||
|
||||
## Describes a file chooser.
|
||||
File_Browse label:Nothing|Text=Nothing display:Display=Display.When_Modified action:File_Action=File_Action.Open file_types:(Vector Pair)=[Pair "All Files" "*.*"]
|
||||
File_Browse label:(Nothing | Text)=Nothing display:Display=Display.When_Modified action:File_Action=File_Action.Open file_types:(Vector Pair)=[Pair.new "All Files" "*.*"]
|
||||
|
@ -17,9 +17,13 @@ type Postgres_Options
|
||||
Arguments:
|
||||
- host: The hostname of the database server (defaults to localhost).
|
||||
- port: The port of the database server (defaults to 5432).
|
||||
- database: The database to connect to. If empty, the default database will be used.
|
||||
- schema: The schema to connect to. If empty, the default schema will be used.
|
||||
- credentials: The credentials to use for the connection (defaults to PGPass or No Authentication).
|
||||
- database: The database to connect to. By default, it will use the
|
||||
database provided in `PGDATABASE` environment variable, or if that is
|
||||
not defined, it will fall back to a default database name: `postgres`.
|
||||
- schema: The schema to connect to. If empty, the default schema will be
|
||||
used.
|
||||
- credentials: The credentials to use for the connection (defaults to
|
||||
PGPass or No Authentication).
|
||||
- use_ssl: Whether to use SSL (defaults to `SSL_Mode.Prefer`).
|
||||
- client_cert: The client certificate to use or `Nothing` if not needed.
|
||||
Postgres (host:Text=default_postgres_host) (port:Integer=default_postgres_port) (database:Text=default_postgres_database) (schema:Text="") (credentials:(Credentials|Nothing)=Nothing) (use_ssl:SSL_Mode=SSL_Mode.Prefer) (client_cert:(Client_Certificate|Nothing)=Nothing)
|
||||
@ -97,4 +101,4 @@ default_postgres_port =
|
||||
port -> Integer.parse port . catch Number_Parse_Error.Error (_->hardcoded_port)
|
||||
|
||||
## PRIVATE
|
||||
default_postgres_database = Environment.get "PGDATABASE" ""
|
||||
default_postgres_database = Environment.get "PGDATABASE" "postgres"
|
||||
|
@ -5,6 +5,8 @@ import Standard.Base.Error.Illegal_State.Illegal_State
|
||||
from Standard.Table import Sort_Column
|
||||
import Standard.Table.Data.Value_Type.Value_Type
|
||||
import Standard.Table.Data.Column.Column as Materialized_Column
|
||||
import Standard.Table.Data.Column_Type_Selection.Auto
|
||||
import Standard.Table.Data.Value_Type.Value_Type
|
||||
|
||||
import project.Data.SQL_Statement.SQL_Statement
|
||||
import project.Data.SQL_Type.SQL_Type
|
||||
@ -16,7 +18,7 @@ import project.Internal.IR.Query.Query
|
||||
|
||||
from project.Data.Table import Table, freshen_columns
|
||||
|
||||
from project.Errors import Unsupported_Database_Operation, Integrity_Error
|
||||
from project.Errors import Unsupported_Database_Operation, Integrity_Error, Unsupported_Name
|
||||
|
||||
type Column
|
||||
|
||||
@ -612,10 +614,9 @@ type Column
|
||||
|
||||
Arguments:
|
||||
- new_name: The name to rename `self` column to.
|
||||
rename : Text -> Column
|
||||
rename self new_name = case Helpers.ensure_name_is_sane new_name of
|
||||
True ->
|
||||
Column.Value new_name self.connection self.sql_type self.expression self.context
|
||||
rename : Text -> Column ! Unsupported_Name
|
||||
rename self new_name = Helpers.ensure_name_is_sane new_name <|
|
||||
Column.Value new_name self.connection self.sql_type self.expression self.context
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
@ -669,13 +670,16 @@ type Column
|
||||
|
||||
Arguments:
|
||||
- other: A column or text to check for each item in `self`.
|
||||
- case_sensitivity: Specifies if the text values should be compared case
|
||||
sensitively.
|
||||
|
||||
If `other` is a column, the operation is performed pairwise between
|
||||
corresponding elements of `self` and `other`. If the argument is a
|
||||
missing value (a Nothing or a column with missing values), the behaviour
|
||||
on these missing values is vendor specific.
|
||||
starts_with : Column | Text -> Column
|
||||
starts_with self other = self.make_binary_op "starts_with" other new_type=SQL_Type.boolean
|
||||
starts_with : Column | Text -> Case_Sensitivity -> Column
|
||||
starts_with self other case_sensitivity=Case_Sensitivity.Default =
|
||||
make_text_case_op self "starts_with" other case_sensitivity
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
@ -683,13 +687,16 @@ type Column
|
||||
|
||||
Arguments:
|
||||
- other: A column ot text to check for each item in `self`.
|
||||
- case_sensitivity: Specifies if the text values should be compared case
|
||||
sensitively.
|
||||
|
||||
If `other` is a column, the operation is performed pairwise between
|
||||
corresponding elements of `self` and `other`. If the argument is a
|
||||
missing value (a Nothing or a column with missing values), the behaviour
|
||||
on these missing values is vendor specific.
|
||||
ends_with : Column | Text -> Column
|
||||
ends_with self other = self.make_binary_op "ends_with" other new_type=SQL_Type.boolean
|
||||
ends_with : Column | Text -> Case_Sensitivity -> Column
|
||||
ends_with self other case_sensitivity=Case_Sensitivity.Default =
|
||||
make_text_case_op self "ends_with" other case_sensitivity
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
@ -697,13 +704,16 @@ type Column
|
||||
|
||||
Arguments:
|
||||
- other: A column ot text to check for each item in `self`.
|
||||
- case_sensitivity: Specifies if the text values should be compared case
|
||||
sensitively.
|
||||
|
||||
If `other` is a column, the operation is performed pairwise between
|
||||
corresponding elements of `self` and `other`. If the argument is a
|
||||
missing value (a Nothing or a column with missing values), the behaviour
|
||||
on these missing values is vendor specific.
|
||||
contains : Column | Text -> Column
|
||||
contains self other = self.make_binary_op "contains" other new_type=SQL_Type.boolean
|
||||
contains : Column | Text -> Case_Sensitivity -> Column
|
||||
contains self other case_sensitivity=Case_Sensitivity.Default =
|
||||
make_text_case_op self "contains" other case_sensitivity
|
||||
|
||||
## Checks for each element of the column if it matches an SQL-like pattern.
|
||||
|
||||
@ -780,6 +790,44 @@ type Column
|
||||
new_expr = SQL_Expression.Operation "IS_IN_COLUMN" [self.expression, in_subquery, has_nulls_subquery]
|
||||
Column.Value self.name self.connection SQL_Type.boolean new_expr self.context
|
||||
|
||||
## Parses a text column into values
|
||||
|
||||
Arguments:
|
||||
- type: Target type to parse the value to.
|
||||
If `Auto`, will attempt to deduce the type automatically.
|
||||
- format: For date/time types, the format to parse the value with.
|
||||
For boolean types, two values that represent true and false separated
|
||||
by `|`.
|
||||
- on_problems: What to do if the value cannot be parsed.
|
||||
By default, an `Invalid_Format` warning is attached and the value is
|
||||
set to `Nothing`.
|
||||
|
||||
> Example
|
||||
Parse dates in a column in the format `yyyy-MM-dd` (the default format).
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_contains = Examples.text_column_1.parse Date
|
||||
|
||||
> Example
|
||||
Parse dates in a column in the format `dd/MM/yyyy`
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_contains = Examples.text_column_1.parse Date 'dd/MM/yyyy'
|
||||
|
||||
> Example
|
||||
Parse a Yes/No column into a boolean column.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_contains = Examples.text_column_1.parse Boolean 'Yes|No'
|
||||
@type (Single_Choice ['Auto','Integer','Decimal','Date','Date_Time','Time_Of_Day','Boolean'] display=Display.Always)
|
||||
parse : (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Nothing -> Problem_Behavior -> Column
|
||||
parse self type=Auto format=Nothing on_problems=Report_Warning =
|
||||
_ = [type, format, on_problems]
|
||||
Error.throw <| Unsupported_Database_Operation.Error "`Column.parse` is not implemented yet for the Database backends."
|
||||
|
||||
## PRIVATE
|
||||
as_internal : Internal_Column
|
||||
as_internal self = Internal_Column.Value self.name self.sql_type self.expression
|
||||
@ -799,3 +847,15 @@ get_approximate_type value default = case value of
|
||||
_ : Column -> value.sql_type
|
||||
Nothing -> default
|
||||
_ -> SQL_Type.approximate_type value
|
||||
|
||||
## PRIVATE
|
||||
Helper for case case_sensitivity based text operations
|
||||
make_text_case_op left op other case_sensitivity =
|
||||
Value_Type.expect_text left.value_type <| case case_sensitivity of
|
||||
Case_Sensitivity.Default -> left.make_binary_op op other new_type=SQL_Type.boolean
|
||||
Case_Sensitivity.Sensitive ->
|
||||
cs_other = if other.is_a Column then other.make_unary_op "MAKE_CASE_SENSITIVE" else other
|
||||
left.make_unary_op "MAKE_CASE_SENSITIVE" . make_binary_op op cs_other new_type=SQL_Type.boolean
|
||||
Case_Sensitivity.Insensitive locale -> Helpers.assume_default_locale locale <|
|
||||
ci_other = if other.is_a Column then other.make_unary_op "FOLD_CASE" else other.to_case Case.Lower
|
||||
left.make_unary_op "FOLD_CASE" . make_binary_op op ci_other new_type=SQL_Type.boolean
|
||||
|
@ -11,6 +11,7 @@ import Standard.Base.Error.Unimplemented.Unimplemented
|
||||
from Standard.Base.Metadata.Widget import Single_Choice
|
||||
import Standard.Base.Metadata.Display
|
||||
|
||||
from Standard.Table import Auto_Detect, Aggregate_Column, Data_Formatter, Column_Name_Mapping, Column_Selector, Sort_Column_Selector, Sort_Column, Match_Columns, Position, Set_Mode
|
||||
from Standard.Table import Auto_Detect, Aggregate_Column, Data_Formatter, Column_Name_Mapping, Column_Selector, Sort_Column, Match_Columns, Position
|
||||
import Standard.Table.Data.Column_Type_Selection.Column_Type_Selection
|
||||
import Standard.Table.Data.Expression.Expression
|
||||
@ -20,6 +21,8 @@ import Standard.Table.Data.Join_Kind.Join_Kind
|
||||
import Standard.Table.Data.Report_Unmatched.Report_Unmatched
|
||||
import Standard.Table.Data.Row.Row
|
||||
import Standard.Table.Data.Table.Table as Materialized_Table
|
||||
import Standard.Table.Data.Value_Type.Value_Type
|
||||
import Standard.Table.Internal.Java_Exports
|
||||
import Standard.Table.Internal.Table_Helpers
|
||||
import Standard.Table.Internal.Table_Helpers.Table_Column_Helper
|
||||
import Standard.Table.Internal.Problem_Builder.Problem_Builder
|
||||
@ -27,7 +30,7 @@ import Standard.Table.Internal.Aggregate_Column_Helper
|
||||
from Standard.Table.Data.Column import get_item_string
|
||||
from Standard.Table.Data.Table import print_table
|
||||
from Standard.Table.Internal.Filter_Condition_Helpers import make_filter_column
|
||||
from Standard.Table.Errors import Column_Count_Mismatch, No_Index_Set_Error, No_Such_Column, No_Input_Columns_Selected, No_Output_Columns
|
||||
from Standard.Table.Errors import Column_Count_Mismatch, No_Index_Set_Error, No_Such_Column, No_Input_Columns_Selected, No_Output_Columns, Existing_Column, Missing_Column
|
||||
|
||||
import project.Data.Column.Column
|
||||
import project.Data.SQL_Statement.SQL_Statement
|
||||
@ -42,7 +45,7 @@ import project.Internal.IR.Internal_Column.Internal_Column
|
||||
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
|
||||
import project.Internal.IR.Query.Query
|
||||
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation, Integrity_Error
|
||||
from project.Errors import Unsupported_Database_Operation, Integrity_Error, Unsupported_Name
|
||||
|
||||
import project.Connection.Connection.Connection
|
||||
polyglot java import java.sql.JDBCType
|
||||
@ -427,16 +430,27 @@ type Table
|
||||
on_problems.handle_errors fallback=self.with_no_rows <|
|
||||
mask (make_filter_column column filter)
|
||||
_ : Function -> Error.throw (Unsupported_Database_Operation.Error "Filtering with a custom predicate is not supported in the database.")
|
||||
_ : Text ->
|
||||
_ ->
|
||||
table_at = self.at column
|
||||
if table_at.is_error.not then self.filter table_at filter on_problems else
|
||||
expression = self.evaluate column
|
||||
if expression.is_error.not then self.filter expression filter on_problems else
|
||||
pick_error = expression.catch Expression_Error.Syntax_Error (_->table_at)
|
||||
on_problems.handle_errors pick_error fallback=self
|
||||
_ -> case on_problems.handle_errors (self.at column) fallback=Nothing of
|
||||
Nothing -> self
|
||||
resolved_column -> self.filter resolved_column filter on_problems
|
||||
self.filter table_at filter on_problems
|
||||
|
||||
## ALIAS Filter Rows
|
||||
|
||||
Selects only the rows of this table that correspond to `True` values of
|
||||
`filter`.
|
||||
|
||||
Arguments:
|
||||
- expression: The expression to evaluate to filter the rows.
|
||||
|
||||
> Example
|
||||
Select people celebrating a jubilee.
|
||||
|
||||
people.filter_by_expression "[age] % 10 == 0"
|
||||
filter_by_expression : Text -> Problem_Behavior -> Table
|
||||
filter_by_expression self expression on_problems=Report_Warning =
|
||||
column = self.compute expression
|
||||
if column.value_type != Value_Type.Boolean then Error.throw (Illegal_Argument.Error "Expression must evaluate to a boolean value.") else
|
||||
self.filter column on_problems=on_problems
|
||||
|
||||
## PRIVATE
|
||||
with_no_rows self =
|
||||
@ -511,36 +525,53 @@ type Table
|
||||
Sets the column value at the given name.
|
||||
|
||||
Arguments:
|
||||
- name: The name of the column to set.
|
||||
- column: The new value for the column called `name`.
|
||||
- column: The new column or expression to create column.
|
||||
- new_name: Optional new name for the column.
|
||||
- set_mode: Specifies how to handle the new column.
|
||||
If `Add_Or_Update`, the column will be added if not present, updated
|
||||
if present. If `Add`, the column will be added if not present, error
|
||||
if already present. If `Update`, the column will be updated if
|
||||
present, error if missing.
|
||||
- on_problems: Specifies how to handle problems with expression
|
||||
evaluation.
|
||||
set : Text -> Column | Text -> Problem_Behavior -> Table ! Unsupported_Name | Existing_Column | Missing_Column
|
||||
set self column new_name=Nothing set_mode=Set_Mode.Add_Or_Update on_problems=Report_Warning =
|
||||
resolved = case column of
|
||||
_ : Text -> self.compute column on_problems
|
||||
_ -> column
|
||||
renamed = if new_name.is_nothing then resolved else resolved.rename new_name
|
||||
|
||||
If a column with the given name already exists, it will be replaced.
|
||||
Otherwise a new column is added.
|
||||
set : Text -> Column | Text -> Problem_Behavior -> Table
|
||||
set self name column on_problems=Report_Warning = on_problems.handle_errors fallback=self <|
|
||||
case Helpers.ensure_name_is_sane name of
|
||||
True ->
|
||||
resolved = case column of
|
||||
_ : Text -> self.evaluate column
|
||||
_ -> column
|
||||
new_col = Internal_Column.Value name resolved.sql_type resolved.expression
|
||||
replace = self.internal_columns.any (c -> c.name == name)
|
||||
case replace of
|
||||
True ->
|
||||
new_cols = self.internal_columns.map (c -> if c.name == name then new_col else c)
|
||||
self.updated_columns new_cols
|
||||
False ->
|
||||
self.updated_columns (self.internal_columns + [new_col])
|
||||
Helpers.ensure_name_is_sane renamed.name <|
|
||||
index = self.internal_columns.index_of (c -> c.name == renamed.name)
|
||||
to_add = case set_mode of
|
||||
Set_Mode.Add_Or_Update -> True
|
||||
Set_Mode.Add -> if index.is_nothing then True else Error.throw (Existing_Column.Error renamed.name)
|
||||
Set_Mode.Update -> if index.is_nothing then Error.throw (Missing_Column.Error renamed.name) else True
|
||||
if to_add then
|
||||
new_col = renamed.as_internal
|
||||
new_cols = if index.is_nothing then self.internal_columns + [new_col] else
|
||||
Vector.new self.column_count i-> if i == index then new_col else self.internal_columns.at i
|
||||
self.updated_columns new_cols
|
||||
|
||||
## PRIVATE
|
||||
evaluate : Text -> Column
|
||||
evaluate self expression =
|
||||
## Given an expression, create a derived column where each value is the
|
||||
result of evaluating the expression for the row.
|
||||
|
||||
Arguments:
|
||||
- expression: The expression to evaluate.
|
||||
- on_problems: Specifies how to handle problems with expression
|
||||
evaluation.
|
||||
compute : Text -> Problem_Behavior -> Column
|
||||
compute self expression on_problems=Report_Warning =
|
||||
get_column name = self.at name
|
||||
make_constant value =
|
||||
new_type = SQL_Type.approximate_type value
|
||||
other = SQL_Expression.Constant new_type value
|
||||
Column.Value ("Constant_" + UUID.randomUUID.to_text) self.connection new_type other self.context
|
||||
Expression.evaluate expression get_column make_constant "Standard.Database.Data.Column" "Column" Column.var_args_functions
|
||||
new_column = Expression.evaluate expression get_column make_constant "Standard.Database.Data.Column" "Column" Column.var_args_functions
|
||||
if new_column.is_error then new_column else
|
||||
warnings = Warning.get_all new_column
|
||||
rename = new_column.rename (expression.replace "[^A-Za-z_0-9]" "_" matcher=Regex_Matcher.Value)
|
||||
on_problems.attach_problems_after (Warning.set rename []) warnings
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
@ -725,6 +756,7 @@ type Table
|
||||
allows to join the two tables on equality of corresponding columns with
|
||||
the same name. So `table.join other on=["A", "B"]` is a shorthand for:
|
||||
table.join other on=[Join_Condition.Equals "A" "A", Join_Condition.Equals "B" "B"]
|
||||
@join_kind (Single_Choice ["Join_Kind.Inner", "Join_Kind.Left_Outer", "Join_Kind.Right_Outer", "Join_Kind.Full", "Join_Kind.Left_Exclusive", "Join_Kind.Right_Exclusive"]])
|
||||
join : Table -> Join_Kind -> Join_Condition | Text | Vector (Join_Condition | Text) -> Text -> Problem_Behavior -> Table
|
||||
join self right join_kind=Join_Kind.Inner on=[Join_Condition.Equals 0 0] right_prefix="Right_" on_problems=Report_Warning =
|
||||
can_proceed = if Table_Helpers.is_table right . not then Error.throw (Type_Error.Error Table right "right") else
|
||||
|
@ -55,17 +55,17 @@ unify_vector_singleton x = case x of
|
||||
|
||||
Arguments:
|
||||
- name: The name to check for safety.
|
||||
- action: The action to perform if the name is safe.
|
||||
|
||||
In a future version we will decouple the internal SQL-safe names from the
|
||||
external names shown to the user, but as a temporary solution we only allow
|
||||
SQL-safe names for columns.
|
||||
|
||||
# TODO [RW] better name handling in Tables (#1513)
|
||||
ensure_name_is_sane : Text -> Boolean ! Unsupported_Name
|
||||
ensure_name_is_sane name =
|
||||
is_safe =
|
||||
Pattern.matches "[A-Za-z_0-9]+" name
|
||||
if is_safe then True else
|
||||
ensure_name_is_sane : Text -> (Any -> Any) -> Any ! Unsupported_Name
|
||||
ensure_name_is_sane name ~action =
|
||||
is_safe = Pattern.matches "[A-Za-z_0-9]+" name
|
||||
if is_safe then action else
|
||||
Error.throw <| Unsupported_Name.Error (name + " is not a valid name for a column. Please use english letters, numbers and underscore only.")
|
||||
|
||||
## PRIVATE
|
||||
|
@ -1,6 +1,9 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Error.Illegal_State.Illegal_State
|
||||
|
||||
from Standard.Base.Metadata.Widget import Single_Choice
|
||||
import Standard.Base.Metadata.Display
|
||||
|
||||
import Standard.Table.Data.Table.Table as Materialized_Table
|
||||
|
||||
import project.Connection.Connection.Connection
|
||||
@ -48,6 +51,7 @@ type Postgres_Connection
|
||||
|
||||
Arguments:
|
||||
- database: The name of the database to connect to.
|
||||
@database (self-> Single_Choice display=Display.Always values=(self.databases . map .pretty))
|
||||
set_database : Text -> Connection ! SQL_Error
|
||||
set_database self database =
|
||||
if database == self.database then self else
|
||||
@ -65,6 +69,7 @@ type Postgres_Connection
|
||||
|
||||
Arguments:
|
||||
- schema: The name of the schema to connect to.
|
||||
@schema (self-> Single_Choice display=Display.Always values=(self.schemas . map .pretty))
|
||||
set_schema : Text -> Connection ! SQL_Error
|
||||
set_schema self schema =
|
||||
if schema == self.schema then self else
|
||||
@ -82,6 +87,7 @@ type Postgres_Connection
|
||||
- schema: The schema name to search in (defaults to current schema).
|
||||
- types: The table types to search for. The list of values can be obtained using the `table_types` method.
|
||||
- all_fields: Return all the fields in the metadata table.
|
||||
@types (self-> Single_Choice values=(self.table_types.map .pretty))
|
||||
tables : Text -> Text -> Text -> Vector -> Boolean -> Materialized_Table
|
||||
tables self name_like=Nothing database=self.database schema=self.schema types=Nothing all_fields=False =
|
||||
self.connection.tables name_like database schema types all_fields
|
||||
@ -92,6 +98,7 @@ type Postgres_Connection
|
||||
- query: name of the table or sql statement to query.
|
||||
If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
|
||||
- alias: optionally specify a friendly alias for the query.
|
||||
@query (self-> Single_Choice display=Display.Always values=(self.tables.at "Name" . to_vector . map .pretty))
|
||||
query : Text | SQL_Query -> Text -> Database_Table
|
||||
query self query alias="" = self.connection.query query alias
|
||||
|
||||
@ -101,6 +108,7 @@ type Postgres_Connection
|
||||
- query: name of the table or sql statement to query.
|
||||
If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
|
||||
- limit: the maximum number of rows to return.
|
||||
@query (self-> Single_Choice display=Display.Always values=(self.tables.at "Name" . to_vector . map .pretty))
|
||||
read : Text | SQL_Query -> Integer | Nothing -> Materialized_Table
|
||||
read self query limit=Nothing = self.connection.read query limit
|
||||
|
||||
|
@ -94,7 +94,7 @@ type Postgres_Dialect
|
||||
## PRIVATE
|
||||
make_internal_generator_dialect =
|
||||
cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]
|
||||
text = [starts_with, contains, ends_with, agg_shortest, agg_longest]+concat_ops+cases
|
||||
text = [starts_with, contains, ends_with, agg_shortest, agg_longest, make_case_sensitive]+concat_ops+cases
|
||||
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
|
||||
arith_extensions = [is_nan, decimal_div]
|
||||
bool = [bool_or]
|
||||
@ -242,7 +242,7 @@ agg_count_distinct_include_null args =
|
||||
|
||||
## PRIVATE
|
||||
starts_with = Base_Generator.lift_binary_op "starts_with" str-> sub->
|
||||
res = str ++ " LIKE CONCAT(" ++ sub ++ ", '%')"
|
||||
res = code "starts_with(" ++ str ++ "," ++ sub ++ ")"
|
||||
res.paren
|
||||
|
||||
## PRIVATE
|
||||
@ -252,7 +252,7 @@ ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub->
|
||||
|
||||
## PRIVATE
|
||||
make_case_sensitive = Base_Generator.lift_unary_op "MAKE_CASE_SENSITIVE" arg->
|
||||
code "((" ++ arg ++ ') COLLATE "C.utf8")'
|
||||
code "((" ++ arg ++ ') COLLATE "ucs_basic")'
|
||||
|
||||
## PRIVATE
|
||||
make_contains_expr expr substring =
|
||||
|
@ -44,6 +44,7 @@ type SQLite_Connection
|
||||
|
||||
Arguments:
|
||||
- database: The name of the database to connect to.
|
||||
@database (Single_Choice display=Display.Always values=['Nothing'])
|
||||
set_database : Text -> Connection ! SQL_Error
|
||||
set_database self database =
|
||||
if database == self.database then self else
|
||||
@ -61,6 +62,7 @@ type SQLite_Connection
|
||||
|
||||
Arguments:
|
||||
- schema: The name of the schema to connect to.
|
||||
@schema (Single_Choice display=Display.Always values=['Nothing'])
|
||||
set_schema : Text -> Connection ! SQL_Error
|
||||
set_schema self schema =
|
||||
if schema == self.schema then self else
|
||||
|
@ -112,7 +112,7 @@ type SQLite_Dialect
|
||||
|
||||
## PRIVATE
|
||||
make_internal_generator_dialect =
|
||||
text = [starts_with, contains, ends_with, fold_case, make_case_sensitive]+concat_ops
|
||||
text = [starts_with, contains, ends_with, make_case_sensitive]+concat_ops
|
||||
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
|
||||
stats = [agg_stddev_pop, agg_stddev_samp]
|
||||
arith_extensions = [decimal_div]
|
||||
@ -227,18 +227,14 @@ agg_count_distinct_include_null args = case args.length == 1 of
|
||||
|
||||
## PRIVATE
|
||||
starts_with = Base_Generator.lift_binary_op "starts_with" str-> sub->
|
||||
res = str ++ " LIKE (" ++ sub ++ " || '%')"
|
||||
res = str ++ " GLOB (" ++ sub ++ " || '*')"
|
||||
res.paren
|
||||
|
||||
## PRIVATE
|
||||
ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub->
|
||||
res = str ++ " LIKE ('%' || " ++ sub ++ ")"
|
||||
res = str ++ " GLOB ('*' || " ++ sub ++ ")"
|
||||
res.paren
|
||||
|
||||
## PRIVATE
|
||||
fold_case = Base_Generator.lift_unary_op "FOLD_CASE" arg->
|
||||
code "((" ++ arg ++ ") COLLATE NOCASE)"
|
||||
|
||||
## PRIVATE
|
||||
make_case_sensitive = Base_Generator.lift_unary_op "MAKE_CASE_SENSITIVE" arg->
|
||||
code "((" ++ arg ++ ") COLLATE BINARY)"
|
||||
|
@ -7,11 +7,17 @@ import Standard.Base.Error.Illegal_State.Illegal_State
|
||||
|
||||
import Standard.Base.Data.Index_Sub_Range as Index_Sub_Range_Module
|
||||
|
||||
import project.Data.Table.Table
|
||||
import project.Data.Storage.Storage
|
||||
import project.Data.Value_Type.Value_Type
|
||||
from project.Data.Table import print_table
|
||||
from Standard.Base.Metadata.Widget import Single_Choice
|
||||
import Standard.Base.Metadata.Display
|
||||
|
||||
import project.Data.Column_Type_Selection.Auto
|
||||
import project.Data.Data_Formatter.Data_Formatter
|
||||
import project.Data.Storage.Storage
|
||||
import project.Data.Table.Table
|
||||
import project.Data.Value_Type.Value_Type
|
||||
import project.Internal.Parse_Values_Helper
|
||||
|
||||
from project.Data.Table import print_table
|
||||
from project.Errors import No_Index_Set_Error
|
||||
|
||||
polyglot java import org.enso.table.data.column.storage.Storage as Java_Storage
|
||||
@ -715,6 +721,8 @@ type Column
|
||||
- other: The value to compare `self` with. If `other` is a column, the
|
||||
operation is performed pairwise between corresponding elements of
|
||||
`self` and `other`.
|
||||
- case_sensitivity: Specifies if the text values should be compared case
|
||||
sensitively.
|
||||
|
||||
> Example
|
||||
Check the elements of a column for starting with the elements of
|
||||
@ -731,9 +739,16 @@ type Column
|
||||
import Standard.Examples
|
||||
|
||||
example_starts_with = Examples.text_column_1.starts_with "hell"
|
||||
starts_with : Column | Text -> Column
|
||||
starts_with self other =
|
||||
run_vectorized_binary_op self "starts_with" (a -> b -> a.starts_with b) other
|
||||
|
||||
> Example
|
||||
Check the elements of a column for starting with a value comparing case insensitively.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_starts_with = Examples.text_column_1.starts_with "hell" Case_Sensitivity.Insensitive
|
||||
starts_with : Column | Text -> Case_Sensitivity -> Column
|
||||
starts_with self other case_sensitivity=Case_Sensitivity.Default =
|
||||
run_vectorized_binary_case_text_op self Java_Storage.Maps.STARTS_WITH other case_sensitivity (a -> b -> a.starts_with b case_sensitivity)
|
||||
|
||||
## Checks for each element of the column if it ends with `other`.
|
||||
|
||||
@ -741,6 +756,8 @@ type Column
|
||||
- other: The value to compare `self` with. If `other` is a column, the
|
||||
operation is performed pairwise between corresponding elements of
|
||||
`self` and `other`.
|
||||
- case_sensitivity: Specifies if the text values should be compared case
|
||||
sensitively.
|
||||
|
||||
> Example
|
||||
Check the elements of a column for ending with the elements of another
|
||||
@ -757,9 +774,9 @@ type Column
|
||||
import Standard.Examples
|
||||
|
||||
example_ends_with = Examples.text_column_1.ends_with "hell"
|
||||
ends_with : Column | Text -> Column
|
||||
ends_with self other =
|
||||
run_vectorized_binary_op self "ends_with" (a -> b -> a.ends_with b) other
|
||||
ends_with : Column | Text -> Case_Sensitivity -> Column
|
||||
ends_with self other case_sensitivity=Case_Sensitivity.Default =
|
||||
run_vectorized_binary_case_text_op self Java_Storage.Maps.ENDS_WITH other case_sensitivity (a -> b -> a.ends_with b case_sensitivity)
|
||||
|
||||
## Checks for each element of the column if it contains `other`.
|
||||
|
||||
@ -767,6 +784,8 @@ type Column
|
||||
- other: The value to compare `self` with. If `other` is a column, the
|
||||
operation is performed pairwise between corresponding elements of
|
||||
`self` and `other`.
|
||||
- case_sensitivity: Specifies if the text values should be compared case
|
||||
sensitively.
|
||||
|
||||
> Example
|
||||
Check the elements of a column for containing the elements of another
|
||||
@ -784,8 +803,8 @@ type Column
|
||||
|
||||
example_contains = Examples.text_column_1.contains "hell"
|
||||
contains : Column | Text -> Column
|
||||
contains self other =
|
||||
run_vectorized_binary_op self "contains" (a -> b -> a.contains b) other
|
||||
contains self other case_sensitivity=Case_Sensitivity.Default =
|
||||
run_vectorized_binary_case_text_op self Java_Storage.Maps.CONTAINS other case_sensitivity (a -> b -> a.contains b case_sensitivity)
|
||||
|
||||
## Checks for each element of the column if it matches an SQL-like pattern.
|
||||
|
||||
@ -849,6 +868,66 @@ type Column
|
||||
new_vector = self.to_vector.map (Filter_Condition.Is_In true_vector).to_predicate
|
||||
Column.from_vector result_name new_vector
|
||||
|
||||
## Parses a text column into values
|
||||
|
||||
Arguments:
|
||||
- type: Target type to parse the value to.
|
||||
If `Auto`, will attempt to deduce the type automatically.
|
||||
- format: For date/time types, the format to parse the value with.
|
||||
For boolean types, two values that represent true and false separated
|
||||
by `|`.
|
||||
- on_problems: What to do if the value cannot be parsed.
|
||||
By default, an `Invalid_Format` warning is attached and the value is
|
||||
set to `Nothing`.
|
||||
|
||||
> Example
|
||||
Parse dates in a column in the format `yyyy-MM-dd` (the default format).
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_contains = Examples.text_column_1.parse Date
|
||||
|
||||
> Example
|
||||
Parse dates in a column in the format `dd/MM/yyyy`
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_contains = Examples.text_column_1.parse Date 'dd/MM/yyyy'
|
||||
|
||||
> Example
|
||||
Parse a Yes/No column into a boolean column.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_contains = Examples.text_column_1.parse Boolean 'Yes|No'
|
||||
@type (Single_Choice ['Auto','Integer','Decimal','Date','Date_Time','Time_Of_Day','Boolean'] display=Display.Always)
|
||||
parse : (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Nothing -> Problem_Behavior -> Column
|
||||
parse self type=Auto format=Nothing on_problems=Report_Warning =
|
||||
ensure_type ~fn = if [Auto, Integer, Decimal, Date, Date_Time, Time_Of_Day, Boolean].index_of type == Nothing then Error.throw (Illegal_Argument.Error "Unsupported target type "+type.to_text+".") else fn
|
||||
|
||||
Value_Type.expect_text self.value_type <| ensure_type <|
|
||||
formatter = if format.is_nothing then Data_Formatter.Value else case type of
|
||||
Auto -> Error.throw (Illegal_Argument.Error "Cannot specify a `format` with type `Auto`.")
|
||||
Integer -> Error.throw (Illegal_Argument.Error "Cannot specify a `format` with type `Integer`.")
|
||||
Decimal -> Error.throw (Illegal_Argument.Error "Cannot specify a `format` with type `Decimal`.")
|
||||
Date -> Data_Formatter.Value.with_datetime_formats date_formats=[format]
|
||||
Date_Time -> Data_Formatter.Value.with_datetime_formats datetime_formats=[format]
|
||||
Time_Of_Day -> Data_Formatter.Value.with_datetime_formats time_formats=[format]
|
||||
Boolean ->
|
||||
formats = format.split "|"
|
||||
if formats.length != 2 then Error.throw (Illegal_Argument.Error "The `format` for Booleans must be a string with two values separated by `|`, for example: 'Yes|No'.") else
|
||||
Data_Formatter.Value.with_boolean_values true_values=[formats.at 0] false_values=[formats.at 1]
|
||||
|
||||
parser = if type == Auto then formatter.make_auto_parser else formatter.make_datatype_parser type
|
||||
storage = self.java_column.getStorage
|
||||
new_storage_and_problems = parser.parseColumn self.name storage
|
||||
|
||||
new_storage = new_storage_and_problems.value
|
||||
problems = Vector.from_polyglot_array new_storage_and_problems.problems . map (Parse_Values_Helper.translate_parsing_problem type)
|
||||
|
||||
output = Column.Value (Java_Column.new self.name new_storage)
|
||||
on_problems.attach_problems_after output problems
|
||||
|
||||
## ALIAS Transform Column
|
||||
|
||||
Applies `function` to each item in this column and returns the column
|
||||
@ -1361,3 +1440,18 @@ make_storage_builder_for_type value_type initial_size=128 =
|
||||
Value_Type.Mixed -> Storage.Any
|
||||
_ -> Storage.Any
|
||||
Storage.make_builder closest_storage_type initial_size
|
||||
|
||||
## PRIVATE
|
||||
Helper for case case_sensitivity based text operations
|
||||
run_vectorized_binary_case_text_op : Column -> Text -> (Text | Column) -> Case_Sensitivity -> (Text -> Text -> Any) -> Column
|
||||
run_vectorized_binary_case_text_op left op other case_sensitivity fallback =
|
||||
Value_Type.expect_text left.value_type <| case case_sensitivity of
|
||||
Case_Sensitivity.Default -> run_vectorized_binary_op left op fallback other
|
||||
Case_Sensitivity.Sensitive -> run_vectorized_binary_op left op fallback other
|
||||
Case_Sensitivity.Insensitive _ ->
|
||||
## TODO currently this always runs the fallback which is slow due to the
|
||||
cost of Java-to-Enso calls. We want to have a vectorized
|
||||
implementation, but we need to extend the architecture to allow
|
||||
passing the locale to it.
|
||||
See: https://www.pivotaltracker.com/n/projects/2539304/stories/184093260
|
||||
run_vectorized_binary_op left Nothing fallback other
|
||||
|
@ -0,0 +1,11 @@
|
||||
type Set_Mode
|
||||
## Add a new column to the table.
|
||||
If already present an `Existing_Column` error is returned.
|
||||
Add
|
||||
|
||||
## Update the column in the table.
|
||||
If not present a `Missing_Column` error is returned.
|
||||
Update
|
||||
|
||||
## Add the column to the table if not present, or update if present.
|
||||
Add_Or_Update
|
@ -13,6 +13,7 @@ import Standard.Base.Error.Unimplemented.Unimplemented
|
||||
from Standard.Base.Metadata.Widget import Single_Choice
|
||||
import Standard.Base.Metadata.Display
|
||||
|
||||
import project.Data.Aggregate_Column.Aggregate_Column
|
||||
import project.Data.Column.Column
|
||||
import project.Data.Column as Column_Module
|
||||
import project.Data.Column_Name_Mapping.Column_Name_Mapping
|
||||
@ -25,11 +26,10 @@ import project.Data.Match_Columns as Match_Columns_Helpers
|
||||
import project.Data.Position.Position
|
||||
import project.Data.Report_Unmatched.Report_Unmatched
|
||||
import project.Data.Row.Row
|
||||
import project.Data.Set_Mode.Set_Mode
|
||||
import project.Data.Sort_Column.Sort_Column
|
||||
import project.Data.Storage.Storage
|
||||
import project.Data.Value_Type.Value_Type
|
||||
import project.Data.Sort_Column.Sort_Column
|
||||
import project.Data.Aggregate_Column.Aggregate_Column
|
||||
import project.Data.Storage.Storage
|
||||
import project.Internal.Aggregate_Column_Helper
|
||||
import project.Internal.Java_Problems
|
||||
import project.Internal.Join_Helpers
|
||||
@ -44,7 +44,7 @@ import project.Delimited.Delimited_Format.Delimited_Format
|
||||
|
||||
from project.Data.Column_Type_Selection import Column_Type_Selection, Auto
|
||||
from project.Internal.Rows_View import Rows_View
|
||||
from project.Errors import Column_Count_Mismatch, Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector, No_Index_Set_Error, No_Such_Column, No_Input_Columns_Selected, No_Output_Columns, Invalid_Value_Type, Cross_Join_Row_Limit_Exceeded, Row_Count_Mismatch
|
||||
from project.Errors import Column_Count_Mismatch, Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector, No_Index_Set_Error, No_Such_Column, No_Input_Columns_Selected, No_Output_Columns, Invalid_Value_Type, Cross_Join_Row_Limit_Exceeded, Row_Count_Mismatch, Existing_Column, Missing_Column
|
||||
|
||||
from project.Data.Column import get_item_string
|
||||
from project.Internal.Filter_Condition_Helpers import make_filter_column
|
||||
@ -797,8 +797,8 @@ type Table
|
||||
`filter`.
|
||||
|
||||
Arguments:
|
||||
- column: The column to use for filtering. Can be a column name, index or
|
||||
the `Column` object itself.
|
||||
- column: The column to use for filtering.
|
||||
Can be a column name, index or the `Column` object itself.
|
||||
- filter: The filter to apply to the column. It can either be an instance
|
||||
of `Filter_Condition` or a predicate taking a cell value and returning
|
||||
a boolean value indicating whether the corresponding row should be kept
|
||||
@ -835,6 +835,8 @@ type Table
|
||||
|
||||
people.filter "age" (age -> (age%10 == 0))
|
||||
@column (self-> Single_Choice display=Display.Always values=(self.column_names.map .pretty))
|
||||
@filter Filter_Condition.widget_for_filter_condition
|
||||
@on_problems (Single_Choice ["Report_Error", "Report_Warning", "Ignore"] display=Display.Expanded_Only)
|
||||
filter : (Column | Text | Integer) -> (Filter_Condition|(Any->Boolean)) -> Problem_Behavior -> Table
|
||||
filter self column filter=(Filter_Condition.Is_True) on_problems=Report_Warning = case column of
|
||||
_ : Column ->
|
||||
@ -844,16 +846,27 @@ type Table
|
||||
on_problems.handle_errors fallback=self.with_no_rows <|
|
||||
mask (make_filter_column column filter)
|
||||
_ : Function -> mask (column.map filter)
|
||||
_ : Text ->
|
||||
_ ->
|
||||
table_at = self.at column
|
||||
if table_at.is_error.not then self.filter table_at filter on_problems else
|
||||
expression = self.evaluate column
|
||||
if expression.is_error.not then self.filter expression filter on_problems else
|
||||
pick_error = expression.catch Expression_Error.Syntax_Error (_->table_at)
|
||||
on_problems.handle_errors pick_error fallback=self
|
||||
_ -> case on_problems.handle_errors (self.at column) fallback=Nothing of
|
||||
Nothing -> self
|
||||
resolved_column -> self.filter resolved_column filter on_problems
|
||||
self.filter table_at filter on_problems
|
||||
|
||||
## ALIAS Filter Rows
|
||||
|
||||
Selects only the rows of this table that correspond to `True` values of
|
||||
`filter`.
|
||||
|
||||
Arguments:
|
||||
- expression: The expression to evaluate to filter the rows.
|
||||
|
||||
> Example
|
||||
Select people celebrating a jubilee.
|
||||
|
||||
people.filter_by_expression "[age] % 10 == 0"
|
||||
filter_by_expression : Text -> Problem_Behavior -> Table
|
||||
filter_by_expression self expression on_problems=Report_Warning =
|
||||
column = self.compute expression on_problems=on_problems
|
||||
if column.value_type != Value_Type.Boolean then Error.throw (Illegal_Argument.Error "Expression must evaluate to a boolean value.") else
|
||||
self.filter column on_problems=on_problems
|
||||
|
||||
## PRIVATE
|
||||
with_no_rows self = self.take (First 0)
|
||||
@ -904,17 +917,18 @@ type Table
|
||||
|
||||
## ALIAS Add Column, Update Column
|
||||
|
||||
Sets the column value at the given name.
|
||||
Adds or updates the column in the table.
|
||||
|
||||
Arguments:
|
||||
- name: The name of the column to set the value of.
|
||||
- column: The new value for the column either a `Column` or `Text` of an
|
||||
expression.
|
||||
- on_problems: Specifies how to handle if a problem occurs, raising as a
|
||||
warning by default.
|
||||
|
||||
If a column with the given name already exists, it will be replaced.
|
||||
Otherwise a new column is added.
|
||||
- column: The new column or expression to create column.
|
||||
- new_name: Optional new name for the column.
|
||||
- set_mode: Specifies how to handle the new column.
|
||||
If `Add_Or_Update`, the column will be added if not present, updated
|
||||
if present. If `Add`, the column will be added if not present, error
|
||||
if already present. If `Update`, the column will be updated if
|
||||
present, error if missing.
|
||||
- on_problems: Specifies how to handle problems with expression
|
||||
evaluation.
|
||||
|
||||
> Example
|
||||
Create a table where the values of the total stock in the inventory is
|
||||
@ -925,24 +939,39 @@ type Table
|
||||
example_set =
|
||||
table = Examples.inventory_table
|
||||
double_inventory = table.at "total_stock" * 2
|
||||
table.set "total_stock" double_inventory
|
||||
table.set "total_stock_expr" "2 * [total_stock]"
|
||||
set : Text -> Column | Vector | Text -> Problem_Behavior -> Table
|
||||
set self name column on_problems=Report_Warning = case column of
|
||||
_ : Text ->
|
||||
expression = self.evaluate column
|
||||
if expression.is_error.not then self.set name expression on_problems else
|
||||
on_problems.handle_errors expression fallback=self
|
||||
_ : Vector -> self.set name (Column.from_vector name column)
|
||||
_ : Column ->
|
||||
Table.Value (self.java_table.addOrReplaceColumn (column.rename name . java_column))
|
||||
table.set double_inventory new_name="total_stock"
|
||||
table.set "2 * [total_stock]" new_name="total_stock_expr"
|
||||
set : Text -> Column | Text -> Problem_Behavior -> Table ! Existing_Column | Missing_Column
|
||||
set self column new_name=Nothing set_mode=Set_Mode.Add_Or_Update on_problems=Report_Warning =
|
||||
resolved = case column of
|
||||
_ : Text -> self.compute column on_problems
|
||||
_ : Column -> column
|
||||
renamed = if new_name.is_nothing then resolved else resolved.rename new_name
|
||||
to_add = case set_mode of
|
||||
Set_Mode.Add_Or_Update -> True
|
||||
Set_Mode.Add -> if self.java_table.getColumnByName renamed.name . is_nothing then True else
|
||||
Error.throw (Existing_Column.Error renamed.name)
|
||||
Set_Mode.Update -> if self.java_table.getColumnByName renamed.name . is_nothing . not then True else
|
||||
Error.throw (Missing_Column.Error renamed.name)
|
||||
|
||||
## PRIVATE
|
||||
evaluate : Text -> Column
|
||||
evaluate self expression =
|
||||
if to_add then Table.Value (self.java_table.addOrReplaceColumn renamed.java_column) else to_add
|
||||
|
||||
## Given an expression, create a derived column where each value is the
|
||||
result of evaluating the expression for the row.
|
||||
|
||||
Arguments:
|
||||
- expression: The expression to evaluate.
|
||||
- on_problems: Specifies how to handle problems with expression
|
||||
evaluation.
|
||||
compute : Text -> Problem_Behavior -> Column
|
||||
compute self expression on_problems=Report_Warning =
|
||||
get_column name = self.at name
|
||||
make_constant value = Column.from_vector_repeated (UUID.randomUUID.to_text) [value] self.row_count
|
||||
Expression.evaluate expression get_column make_constant "Standard.Table.Data.Column" "Column" Column.var_args_functions
|
||||
new_column = Expression.evaluate expression get_column make_constant "Standard.Table.Data.Column" "Column" Column.var_args_functions
|
||||
if new_column.is_error then new_column else
|
||||
warnings = Warning.get_all new_column
|
||||
rename = new_column.rename expression
|
||||
on_problems.attach_problems_after (Warning.set rename []) warnings
|
||||
|
||||
## Returns the vector of columns contained in this table.
|
||||
|
||||
@ -1037,6 +1066,7 @@ type Table
|
||||
allows to join the two tables on equality of corresponding columns with
|
||||
the same name. So `table.join other on=["A", "B"]` is a shorthand for:
|
||||
table.join other on=[Join_Condition.Equals "A" "A", Join_Condition.Equals "B" "B"]
|
||||
@join_kind (Single_Choice ["Join_Kind.Inner", "Join_Kind.Left_Outer", "Join_Kind.Right_Outer", "Join_Kind.Full", "Join_Kind.Left_Exclusive", "Join_Kind.Right_Exclusive"]])
|
||||
join : Table -> Join_Kind -> Join_Condition | Text | Vector (Join_Condition | Text) -> Text -> Problem_Behavior -> Table
|
||||
join self right join_kind=Join_Kind.Inner on=[Join_Condition.Equals 0 0] right_prefix="Right_" on_problems=Report_Warning =
|
||||
if check_table "right" right then
|
||||
|
@ -65,7 +65,7 @@ type Delimited_Format
|
||||
for_web content_type _ =
|
||||
parts = content_type.split ";" . map .trim
|
||||
|
||||
charset_part = parts.find if_missing=Nothing (x->x.starts_with "charset=")
|
||||
charset_part = parts.find if_missing=Nothing (x-> x.starts_with "charset=")
|
||||
encoding = if charset_part.if_nothing then Encoding.utf_8 else
|
||||
parsed = Encoding.from_name (charset_part.drop 8)
|
||||
if parsed.is_error then Encoding.utf_8 else parsed
|
||||
|
@ -226,6 +226,20 @@ type Duplicate_Type_Selector
|
||||
to_display_text : Text
|
||||
to_display_text self = "Duplicate type selector for column " + self.column + "."
|
||||
|
||||
## Indicates that the column was already present in the table.
|
||||
type Existing_Column
|
||||
Error column_name
|
||||
|
||||
to_display_text : Text
|
||||
to_display_text self = "The column '" + self.column_name + "' already exists, but `Set_Mode.Add` was selected."
|
||||
|
||||
## Indicates that the column was not present in the table.
|
||||
type Missing_Column
|
||||
Error column_name
|
||||
|
||||
to_display_text : Text
|
||||
to_display_text self = "The column '" + self.column_name + "' was not found, but `Set_Mode.Update` was selected."
|
||||
|
||||
## Indicates that the target range contains existing data and the user did not
|
||||
specify to overwrite.
|
||||
type Existing_Data
|
||||
|
@ -32,22 +32,22 @@ make_filter_column source_column filter_condition = case filter_condition of
|
||||
Greater value -> (source_column > value)
|
||||
Between lower upper -> source_column.between lower upper
|
||||
# Text
|
||||
Starts_With prefix ->
|
||||
Starts_With prefix case_sensitivity ->
|
||||
Value_Type.expect_text source_column.value_type <|
|
||||
expect_column_or_value_as_text "prefix" prefix <|
|
||||
source_column.starts_with prefix
|
||||
Ends_With suffix ->
|
||||
source_column.starts_with prefix case_sensitivity
|
||||
Ends_With suffix case_sensitivity ->
|
||||
Value_Type.expect_text source_column.value_type <|
|
||||
expect_column_or_value_as_text "suffix" suffix <|
|
||||
source_column.ends_with suffix
|
||||
Contains substring ->
|
||||
source_column.ends_with suffix case_sensitivity
|
||||
Contains substring case_sensitivity ->
|
||||
Value_Type.expect_text source_column.value_type <|
|
||||
expect_column_or_value_as_text "substring" substring <|
|
||||
source_column.contains substring
|
||||
Not_Contains substring ->
|
||||
source_column.contains substring case_sensitivity
|
||||
Not_Contains substring case_sensitivity ->
|
||||
Value_Type.expect_text source_column.value_type <|
|
||||
expect_column_or_value_as_text "substring" substring <|
|
||||
source_column.contains substring . not
|
||||
source_column.contains substring case_sensitivity . not
|
||||
Is_Empty ->
|
||||
Value_Type.expect_text source_column.value_type <|
|
||||
source_column.is_empty
|
||||
|
@ -176,7 +176,7 @@ type Table_Column_Helper
|
||||
adding a new column with a clashing name will not affect any
|
||||
other columns computed from the old column with that name.
|
||||
table_with_blank_indicators = blanks.fold self.table table-> blanks_col->
|
||||
table.set blanks_col.name blanks_col
|
||||
table.set blanks_col
|
||||
just_indicators = table_with_blank_indicators.select_columns (blanks.map .name) on_problems=Problem_Behavior.Report_Error
|
||||
# Maximum is equivalent to Exists and Minimum is equivalent to Forall.
|
||||
col_aggregate = if when_any then Maximum _ else Minimum _
|
||||
@ -212,7 +212,7 @@ type Table_Column_Helper
|
||||
matched_columns = self.internal_columns.filter column->(column.name==selector)
|
||||
if matched_columns.length == 1 then matched_columns.first else
|
||||
if matched_columns.length != 0 then Panic.throw (Illegal_State.Error "A single exact match should never match more than one column. Perhaps the table breaks the invariant of unique column names?") else
|
||||
expression = (self.table.evaluate selector).catch Any _->Nothing
|
||||
expression = (self.table.compute selector).catch Any _->Nothing
|
||||
if Nothing != expression then expression else
|
||||
problem_builder.report_missing_input_columns [selector]
|
||||
Nothing
|
||||
|
@ -4,12 +4,15 @@ import project.Data.Aggregate_Column.Aggregate_Column
|
||||
import project.Data.Column.Column
|
||||
import project.Data.Column_Name_Mapping.Column_Name_Mapping
|
||||
import project.Data.Column_Selector.Column_Selector
|
||||
import project.Data.Column_Type_Selection.Auto
|
||||
import project.Data.Column_Type_Selection.Column_Type_Selection
|
||||
import project.Data.Data_Formatter.Data_Formatter
|
||||
import project.Data.Join_Condition.Join_Condition
|
||||
import project.Data.Join_Kind.Join_Kind
|
||||
import project.Data.Match_Columns.Match_Columns
|
||||
import project.Data.Position.Position
|
||||
import project.Data.Report_Unmatched.Report_Unmatched
|
||||
import project.Data.Set_Mode.Set_Mode
|
||||
import project.Data.Sort_Column.Sort_Column
|
||||
import project.Data.Table.Table
|
||||
import project.Data.Table_Conversions
|
||||
@ -23,12 +26,15 @@ export project.Data.Aggregate_Column.Aggregate_Column
|
||||
export project.Data.Column.Column
|
||||
export project.Data.Column_Name_Mapping.Column_Name_Mapping
|
||||
export project.Data.Column_Selector.Column_Selector
|
||||
export project.Data.Column_Type_Selection.Auto
|
||||
export project.Data.Column_Type_Selection.Column_Type_Selection
|
||||
export project.Data.Data_Formatter.Data_Formatter
|
||||
export project.Data.Join_Condition.Join_Condition
|
||||
export project.Data.Join_Kind.Join_Kind
|
||||
export project.Data.Match_Columns.Match_Columns
|
||||
export project.Data.Position.Position
|
||||
export project.Data.Report_Unmatched.Report_Unmatched
|
||||
export project.Data.Set_Mode.Set_Mode
|
||||
export project.Data.Sort_Column.Sort_Column
|
||||
export project.Data.Table.Table
|
||||
export project.Data.Table_Conversions
|
||||
|
@ -3,9 +3,7 @@ package org.enso.interpreter.runtime.data.text;
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
import com.oracle.truffle.api.CompilerDirectives;
|
||||
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
|
||||
import com.oracle.truffle.api.dsl.Cached;
|
||||
import com.oracle.truffle.api.utilities.TriState;
|
||||
import com.oracle.truffle.api.interop.InteropLibrary;
|
||||
import com.oracle.truffle.api.interop.TruffleObject;
|
||||
import com.oracle.truffle.api.library.CachedLibrary;
|
||||
@ -197,7 +195,8 @@ public final class Text implements TruffleObject {
|
||||
String str = toJavaStringNode.execute(this);
|
||||
// TODO This should be more extensible
|
||||
String replaced =
|
||||
str.replace("'", "\\'")
|
||||
str.replace("\\", "\\\\")
|
||||
.replace("'", "\\'")
|
||||
.replace("\n", "\\n")
|
||||
.replace("\t", "\\t")
|
||||
.replace("\u0007", "\\a")
|
||||
|
@ -120,18 +120,24 @@ spec setup =
|
||||
((t.at "A").between (t.at "B") (t.at "C")) . to_vector . should_equal r
|
||||
|
||||
Test.group prefix+"Column Operations - Text" <|
|
||||
t3 = table_builder [["s1", ["foobar", "bar", "baz", Nothing]], ["s2", ["foo", "ar", "a", Nothing]]]
|
||||
t3 = table_builder [["s1", ["foobar", "bar", "baz", "BAB", Nothing]], ["s2", ["foo", "ar", "a", "b", Nothing]]]
|
||||
s1 = t3.at "s1"
|
||||
s2 = t3.at "s2"
|
||||
Test.specify "should handle basic Text operations" <|
|
||||
s1.starts_with s2 . to_vector . should_equal [True, False, False, Nothing]
|
||||
s1.starts_with "foo" . to_vector . should_equal [True, False, False, Nothing]
|
||||
s1.starts_with "ba" . to_vector . should_equal [False, True, True, Nothing]
|
||||
s1.starts_with s2 . to_vector . should_equal [True, False, False, False, Nothing]
|
||||
s1.starts_with s2 Case_Sensitivity.Insensitive . to_vector . should_equal [True, False, False, True, Nothing]
|
||||
s1.starts_with "foo" . to_vector . should_equal [True, False, False, False, Nothing]
|
||||
s1.starts_with "ba" . to_vector . should_equal [False, True, True, False, Nothing]
|
||||
s1.starts_with "ba" Case_Sensitivity.Insensitive . to_vector . should_equal [False, True, True, True, Nothing]
|
||||
|
||||
s1.contains s2 . to_vector . should_equal [True, True, True, Nothing]
|
||||
s1.contains "a" . to_vector . should_equal [True, True, True, Nothing]
|
||||
s1.contains "oo" . to_vector . should_equal [True, False, False, Nothing]
|
||||
s1.contains s2 . to_vector . should_equal [True, True, True, False, Nothing]
|
||||
s1.contains s2 Case_Sensitivity.Insensitive . to_vector . should_equal [True, True, True, True, Nothing]
|
||||
s1.contains "a" . to_vector . should_equal [True, True, True, False, Nothing]
|
||||
s1.contains "oo" . to_vector . should_equal [True, False, False, False, Nothing]
|
||||
s1.contains "a" Case_Sensitivity.Insensitive . to_vector . should_equal [True, True, True, True, Nothing]
|
||||
|
||||
s1.ends_with s2 . to_vector . should_equal [False, True, False, Nothing]
|
||||
s1.ends_with "ar" . to_vector . should_equal [True, True, False, Nothing]
|
||||
s1.ends_with "a" . to_vector . should_equal [False, False, False, Nothing]
|
||||
s1.ends_with s2 . to_vector . should_equal [False, True, False, False, Nothing]
|
||||
s1.ends_with s2 Case_Sensitivity.Insensitive . to_vector . should_equal [False, True, False, True, Nothing]
|
||||
s1.ends_with "ar" . to_vector . should_equal [True, True, False, False, Nothing]
|
||||
s1.ends_with "a" . to_vector . should_equal [False, False, False, False, Nothing]
|
||||
s1.ends_with "b" Case_Sensitivity.Insensitive . to_vector . should_equal [False, False, False, True, Nothing]
|
||||
|
@ -2,6 +2,7 @@ from Standard.Base import all
|
||||
import Standard.Base.Error.Common.Index_Out_Of_Bounds
|
||||
import Standard.Base.Error.Illegal_Argument.Illegal_Argument
|
||||
|
||||
from Standard.Table import Set_Mode
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
from Standard.Test import Test
|
||||
@ -14,7 +15,7 @@ main = run_default_backend spec
|
||||
spec setup =
|
||||
prefix = setup.prefix
|
||||
table_builder = setup.table_builder
|
||||
table =
|
||||
table_fn =
|
||||
col1 = ["foo", [1,2,3]]
|
||||
col2 = ["bar", [4,5,6]]
|
||||
col3 = ["Baz", [7,8,9]]
|
||||
@ -23,6 +24,7 @@ spec setup =
|
||||
col6 = ["ab.+123", [16,17,18]]
|
||||
col7 = ["abcd123", [19,20,21]]
|
||||
table_builder [col1, col2, col3, col4, col5, col6, col7]
|
||||
table = table_fn
|
||||
|
||||
Test.group prefix+"Table.at" <|
|
||||
Test.specify "should allow selecting columns by name" <|
|
||||
@ -80,6 +82,42 @@ spec setup =
|
||||
table.get (Pair.new 1 2) . should_fail_with Illegal_Argument.Error
|
||||
table.get (Pair.new 1 2) . catch . to_display_text . should_equal "Illegal Argument: expected 'selector' to be either a Text or an Integer, but got Pair.Value."
|
||||
|
||||
Test.group prefix+"Table.set" <|
|
||||
Test.specify "should allow adding a column" <|
|
||||
bar2 = table.get "bar" . rename "bar2"
|
||||
t2 = table.set bar2
|
||||
t2.column_names . should_equal ["foo", "bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123", "bar2"]
|
||||
t2.get "bar2" . to_vector . should_equal [4, 5, 6]
|
||||
|
||||
t3 = t2.set bar2 "bar3"
|
||||
t3.column_names . should_equal ["foo", "bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123", "bar2", "bar3"]
|
||||
|
||||
Test.specify "should allow replacing a column" <|
|
||||
foo = table.get "bar" . rename "foo"
|
||||
t2 = table.set foo
|
||||
t2.column_names . should_equal ["foo", "bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123"]
|
||||
t2.get "foo" . to_vector . should_equal [4, 5, 6]
|
||||
|
||||
t3 = t2.set foo "bar3"
|
||||
t3.column_names . should_equal ["foo", "bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123", "bar3"]
|
||||
|
||||
Test.specify "should allow adding a column" <|
|
||||
bar2 = table.get "bar" . rename "bar2"
|
||||
table.set bar2 set_mode=Set_Mode.Update . should_fail_with Missing_Column.Error
|
||||
|
||||
foo = table.get "bar" . rename "foo"
|
||||
table.set foo set_mode=Set_Mode.Add . should_fail_with Existing_Column.Error
|
||||
|
||||
Test.specify "should not affect existing columns that depended on the old column being replaced" <|
|
||||
t1 = table_builder [["X", [1,2,3]]]
|
||||
t2 = t1.set (t1.at "X" * 100) new_name="Y"
|
||||
t3 = t2.set "[X] + 10" new_name="Z"
|
||||
t4 = t3.set "[X] + 1000" new_name="X"
|
||||
|
||||
t4.at "X" . to_vector . should_equal [1001, 1002, 1003]
|
||||
t4.at "Y" . to_vector . should_equal [100, 200, 300]
|
||||
t4.at "Z" . to_vector . should_equal [11, 12, 13]
|
||||
|
||||
Test.group prefix+"Table.column_names" <|
|
||||
Test.specify "should return the names of all columns" <|
|
||||
table.column_names . should_equal ["foo", "bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123"]
|
||||
|
@ -30,7 +30,7 @@ spec detailed setup =
|
||||
epsilon=0.0000000001
|
||||
|
||||
tester expression value =
|
||||
new_table = test_table.set "NEW_COL" expression
|
||||
new_table = test_table.set expression new_name="NEW_COL"
|
||||
new_table.column_count . should_equal (test_table.column_count + 1)
|
||||
new_column = new_table.at -1
|
||||
new_column.name . should_equal "NEW_COL"
|
||||
@ -278,8 +278,7 @@ spec detailed setup =
|
||||
|
||||
Test.group prefix+"Errors should be handled" pending=pending_bug <|
|
||||
error_tester expression fail_type =
|
||||
test_table.set "NEW_COL" expression on_problems=Problem_Behavior.Report_Error . should_fail_with fail_type
|
||||
test_table.set "NEW_COL" expression . column_count . should_equal test_table.column_count
|
||||
test_table.set expression new_name="NEW_COL" . should_fail_with fail_type
|
||||
|
||||
specify_test "should fail with Syntax_Error if badly formed" expression_test=error_tester expression_test->
|
||||
expression_test "IIF [A] THEN 1 ELSE 2" Expression_Error.Syntax_Error
|
||||
@ -293,4 +292,4 @@ spec detailed setup =
|
||||
expression_test "starts_with([C])" Expression_Error.Argument_Mismatch
|
||||
|
||||
specify_test "should fail with Argument_Mismatch if too many arguments" expression_test=error_tester expression_test->
|
||||
expression_test "starts_with([C], 'Hello', 'World')" Expression_Error.Argument_Mismatch
|
||||
expression_test "Not([C], 'Hello')" Expression_Error.Argument_Mismatch
|
||||
|
@ -1,8 +1,10 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Error.Common.Index_Out_Of_Bounds
|
||||
import Standard.Base.Error.Common.Type_Error
|
||||
import Standard.Base.Error.Illegal_Argument.Illegal_Argument
|
||||
|
||||
import Standard.Table.Data.Value_Type.Value_Type
|
||||
import Standard.Table.Data.Expression.Expression_Error
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
from Standard.Database.Errors import SQL_Error
|
||||
@ -78,26 +80,45 @@ spec setup =
|
||||
t.filter "X" (Filter_Condition.Equal to=(t.at "Y")) . at "X" . to_vector . should_equal ["b", "c"]
|
||||
t.filter "X" (Filter_Condition.Between (t.at "Y") "bzzzz") . at "X" . to_vector . should_equal ["abb", "baca", "b"]
|
||||
|
||||
Test.specify "by text search (contains, starts_with, ends_with, like)" <|
|
||||
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["X", ["abb", "baca", "banana", Nothing, "nana"]], ["Y", ["a", "b", "d", "c", "a"]], ["Z", ["aaaaa", "bbbbb", "[ab]", "[ab]aaaa", "[ab]ccc"]]]
|
||||
Test.specify "by text search (contains, starts_with, ends_with, not_contains)" <|
|
||||
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["X", ["abb", "bacb", "banana", Nothing, "nana"]], ["Y", ["a", "B", "d", "c", "a"]], ["Z", ["aaaaa", "bbbbb", "[ab]", "[ab]aaaa", "[ab]ccc"]]]
|
||||
|
||||
t.filter "X" (Filter_Condition.Starts_With "ba") on_problems=Report_Error . at "X" . to_vector . should_equal ["baca", "banana"]
|
||||
t.filter "X" (Filter_Condition.Starts_With "ba") on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb", "banana"]
|
||||
t.filter "X" (Filter_Condition.Starts_With "BA" Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal []
|
||||
t.filter "X" (Filter_Condition.Starts_With "BA" Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb", "banana"]
|
||||
t.filter "X" (Filter_Condition.Ends_With "na") on_problems=Report_Error . at "X" . to_vector . should_equal ["banana", "nana"]
|
||||
t.filter "X" (Filter_Condition.Contains "ac") on_problems=Report_Error . at "X" . to_vector . should_equal ["baca"]
|
||||
t.filter "X" (Filter_Condition.Ends_With "NA" Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal []
|
||||
t.filter "X" (Filter_Condition.Ends_With "NA" Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["banana", "nana"]
|
||||
t.filter "X" (Filter_Condition.Contains "ac") on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb"]
|
||||
t.filter "X" (Filter_Condition.Contains "AC" Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal []
|
||||
t.filter "X" (Filter_Condition.Contains "AC" Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb"]
|
||||
t.filter "X" (Filter_Condition.Not_Contains "ac") on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "banana", "nana"]
|
||||
t.filter "X" (Filter_Condition.Not_Contains "AC" Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "bacb", "banana", "nana"]
|
||||
t.filter "X" (Filter_Condition.Not_Contains "AC" Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "banana", "nana"]
|
||||
|
||||
t.filter "X" (Filter_Condition.Starts_With (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "baca"]
|
||||
t.filter "X" (Filter_Condition.Starts_With (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb"]
|
||||
t.filter "X" (Filter_Condition.Starts_With (t.at "Y") Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb"]
|
||||
t.filter "X" (Filter_Condition.Starts_With (t.at "Y") Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "bacb"]
|
||||
t.filter "X" (Filter_Condition.Ends_With (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["nana"]
|
||||
t.filter "X" (Filter_Condition.Contains (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "baca", "nana"]
|
||||
t.filter "X" (Filter_Condition.Not_Contains (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["banana"]
|
||||
t.filter "X" (Filter_Condition.Ends_With (t.at "Y") Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["nana"]
|
||||
t.filter "X" (Filter_Condition.Ends_With (t.at "Y") Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb", "nana"]
|
||||
t.filter "X" (Filter_Condition.Contains (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "nana"]
|
||||
t.filter "X" (Filter_Condition.Contains (t.at "Y") Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "nana"]
|
||||
t.filter "X" (Filter_Condition.Contains (t.at "Y") Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "bacb", "nana"]
|
||||
t.filter "X" (Filter_Condition.Not_Contains (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb", "banana"]
|
||||
t.filter "X" (Filter_Condition.Not_Contains (t.at "Y") Case_Sensitivity.Sensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb", "banana"]
|
||||
t.filter "X" (Filter_Condition.Not_Contains (t.at "Y") Case_Sensitivity.Insensitive) on_problems=Report_Error . at "X" . to_vector . should_equal ["banana"]
|
||||
|
||||
Test.specify "by text search (like, not_like)" <|
|
||||
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["X", ["abb", "bacb", "banana", Nothing, "nana"]], ["Y", ["a", "B", "d", "c", "a"]], ["Z", ["aaaaa", "bbbbb", "[ab]", "[ab]aaaa", "[ab]ccc"]]]
|
||||
|
||||
t.filter "X" (Filter_Condition.Like "%an%") on_problems=Report_Error . at "X" . to_vector . should_equal ["banana", "nana"]
|
||||
t.filter "X" (Filter_Condition.Like "_a%") on_problems=Report_Error . at "X" . to_vector . should_equal ["baca", "banana", "nana"]
|
||||
t.filter "X" (Filter_Condition.Like "%b") on_problems=Report_Error . at "X" . to_vector . should_equal ["abb"]
|
||||
t.filter "X" (Filter_Condition.Like "_a%") on_problems=Report_Error . at "X" . to_vector . should_equal ["bacb", "banana", "nana"]
|
||||
t.filter "X" (Filter_Condition.Like "%b") on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "bacb"]
|
||||
t.filter "X" (Filter_Condition.Like "nana") on_problems=Report_Error . at "X" . to_vector . should_equal ["nana"]
|
||||
t.filter "Z" (Filter_Condition.Like "[ab]_%") on_problems=Report_Error . at "Z" . to_vector . should_equal ["[ab]aaaa", "[ab]ccc"]
|
||||
|
||||
t.filter "X" (Filter_Condition.Not_Like "%b") on_problems=Report_Error . at "X" . to_vector . should_equal ["baca", "banana", "nana"]
|
||||
t.filter "X" (Filter_Condition.Not_Like "%b") on_problems=Report_Error . at "X" . to_vector . should_equal ["banana", "nana"]
|
||||
t.filter "Z" (Filter_Condition.Not_Like "[ab]%") on_problems=Report_Error . at "Z" . to_vector . should_equal ["aaaaa", "bbbbb"]
|
||||
|
||||
Test.specify "text operations should also match newlines" <|
|
||||
@ -259,16 +280,29 @@ spec setup =
|
||||
|
||||
Test.specify "should handle selection errors: unknown column name" <|
|
||||
t = table_builder [["X", [10, 20, 13, 4, 5]]]
|
||||
action = t.filter "unknown column" on_problems=_
|
||||
tester table =
|
||||
table.at "X" . to_vector . should_equal (t.at "X" . to_vector)
|
||||
problems = [No_Such_Column.Error "unknown column"]
|
||||
Problems.test_problem_handling action problems tester
|
||||
t.filter "unknown column" . should_fail_with No_Such_Column.Error
|
||||
t.filter "unknown column" . catch . should_equal (No_Such_Column.Error "unknown column")
|
||||
|
||||
Test.specify "should handle selection errors: out of bounds index" <|
|
||||
t = table_builder [["X", [10, 20, 13, 4, 5]]]
|
||||
action = t.filter 4 on_problems=_
|
||||
tester table =
|
||||
table.at "X" . to_vector . should_equal (t.at "X" . to_vector)
|
||||
problems = [Index_Out_Of_Bounds.Error 4 1]
|
||||
Problems.test_problem_handling action problems tester
|
||||
t.filter 4 . should_fail_with Index_Out_Of_Bounds.Error
|
||||
t.filter 4 . catch . should_equal (Index_Out_Of_Bounds.Error 4 1)
|
||||
|
||||
Test.group prefix+"Table.filter_by_expression" <|
|
||||
Test.specify "by a boolean column" <|
|
||||
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]]
|
||||
t.filter_by_expression "[b]" on_problems=Report_Error . at "ix" . to_vector . should_equal [1, 4, 5]
|
||||
t.filter_by_expression "![b]" on_problems=Report_Error . at "ix" . to_vector . should_equal [2]
|
||||
|
||||
Test.specify "by an integer comparison" <|
|
||||
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]]
|
||||
t.filter_by_expression "[ix]==3" on_problems=Report_Error . at "ix" . to_vector . should_equal [3]
|
||||
t.filter_by_expression "[ix]>2" on_problems=Report_Error . at "ix" . to_vector . should_equal [3, 4, 5]
|
||||
|
||||
Test.specify "fail gracefully" <|
|
||||
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]]
|
||||
t.filter_by_expression "[ix" . should_fail_with Expression_Error.Syntax_Error
|
||||
t.filter_by_expression "NOT(b,False)" . should_fail_with Expression_Error.Argument_Mismatch
|
||||
t.filter_by_expression "Starts_With(b)" . should_fail_with Expression_Error.Argument_Mismatch
|
||||
t.filter_by_expression "[missing]" . should_fail_with No_Such_Column.Error
|
||||
t.filter_by_expression "[ix]" . should_fail_with Illegal_Argument.Error
|
||||
|
@ -587,8 +587,8 @@ spec setup =
|
||||
Test.specify "should work correctly when the join is performed on a transformed table" <|
|
||||
t1 = table_builder [["X", [1, 2, 3]]]
|
||||
|
||||
t1_2 = t1.set "A" "10*[X]+1"
|
||||
t1_3 = t1.set "B" "[X]+20"
|
||||
t1_2 = t1.set "10*[X]+1" new_name="A"
|
||||
t1_3 = t1.set "[X]+20" new_name="B"
|
||||
|
||||
t2 = t1_2.join t1_3 on=(Join_Condition.Equals "A" "B")
|
||||
t2.at "A" . to_vector . should_equal [21]
|
||||
@ -599,7 +599,7 @@ spec setup =
|
||||
t4 = table_builder [["X", [1, 2, 3]], ["Y", [10, 20, 30]]]
|
||||
t5 = table_builder [["X", [5, 7, 1]], ["Z", [100, 200, 300]]]
|
||||
|
||||
t4_2 = t4.set "C" "2*[X]+1"
|
||||
t4_2 = t4.set "2*[X]+1" new_name="C"
|
||||
t6 = t4_2.join t5 on=(Join_Condition.Equals "C" "X") join_kind=Join_Kind.Inner
|
||||
expect_column_names ["X", "Y", "C", "Right_X", "Z"] t6
|
||||
r2 = materialize t6 . order_by ["Y"] . rows . map .to_vector
|
||||
@ -623,7 +623,7 @@ spec setup =
|
||||
t4 = table_builder [["X", [1, 2, 3]], ["Y", [10, 20, 30]]]
|
||||
t5 = table_builder [["X", [5, 7, 1]], ["Z", [100, 200, 300]]]
|
||||
|
||||
t4_2 = t4.set "C" "2*[X]+1"
|
||||
t4_2 = t4.set "2*[X]+1" new_name="C"
|
||||
t6 = t4_2.join t5 on=(Join_Condition.Equals "C" "X") join_kind=Join_Kind.Full
|
||||
expect_column_names ["X", "Y", "C", "Right_X", "Z"] t6
|
||||
r2 = materialize t6 . order_by ["Y"] . rows . map .to_vector
|
||||
@ -634,7 +634,7 @@ spec setup =
|
||||
r2.at 3 . should_equal [3, 30, 7, 7, 200]
|
||||
|
||||
t4_3 = table_builder [["X", [Nothing, 2, 3]], ["Y", [10, 20, 30]]]
|
||||
t4_4 = t4_3.set "C" (t4_3.at "X" . fill_missing 7)
|
||||
t4_4 = t4_3.set (t4_3.at "X" . fill_missing 7) new_name="C"
|
||||
t7 = t4_4.join t5 on=(Join_Condition.Equals "C" "X") join_kind=Join_Kind.Full
|
||||
within_table t7 <|
|
||||
expect_column_names ["X", "Y", "C", "Right_X", "Z"] t7
|
||||
|
@ -51,7 +51,7 @@ spec =
|
||||
foo = t1.at "A" . rename "FOO"
|
||||
foo.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "FOO" FROM "T1" AS "T1"', []]
|
||||
|
||||
t3 = t2.set "bar" foo
|
||||
t3 = t2.set foo new_name="bar"
|
||||
t3.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C", "T1"."B" AS "B", "T1"."A" AS "bar" FROM "T1" AS "T1"', []]
|
||||
|
||||
Test.specify "should fail if at is called for a non-existent column" <|
|
||||
@ -81,8 +81,8 @@ spec =
|
||||
ends = b.ends_with "suf"
|
||||
starts = b.starts_with "pref"
|
||||
contains = b.contains "inf"
|
||||
ends.to_sql.prepare . should_equal ['SELECT ("T1"."B" LIKE (\'%\' || ?)) AS "B" FROM "T1" AS "T1"', [["suf", str]]]
|
||||
starts.to_sql.prepare . should_equal ['SELECT ("T1"."B" LIKE (? || \'%\')) AS "B" FROM "T1" AS "T1"', [["pref", str]]]
|
||||
ends.to_sql.prepare . should_equal ['SELECT ("T1"."B" GLOB (\'*\' || ?)) AS "B" FROM "T1" AS "T1"', [["suf", str]]]
|
||||
starts.to_sql.prepare . should_equal ['SELECT ("T1"."B" GLOB (? || \'*\')) AS "B" FROM "T1" AS "T1"', [["pref", str]]]
|
||||
contains.to_sql.prepare . should_equal ['SELECT instr("T1"."B", ?) > 0 AS "B" FROM "T1" AS "T1"', [["inf", str]]]
|
||||
|
||||
Test.group "[Codegen] Masking Tables and Columns" <|
|
||||
|
@ -298,8 +298,8 @@ connection_setup_spec = Test.group "[PostgreSQL] Connection setup" <|
|
||||
|
||||
c2.host . should_equal "localhost"
|
||||
c2.port . should_equal 5432
|
||||
c2.database . should_equal ""
|
||||
c2.jdbc_url . should_equal "jdbc:postgresql://localhost:5432"
|
||||
c2.database . should_equal "postgres"
|
||||
c2.jdbc_url . should_equal "jdbc:postgresql://localhost:5432/postgres"
|
||||
|
||||
c3.host . should_equal "192.168.0.1"
|
||||
c3.port . should_equal 1000
|
||||
@ -312,18 +312,18 @@ connection_setup_spec = Test.group "[PostgreSQL] Connection setup" <|
|
||||
Postgres
|
||||
c4.host . should_equal "localhost"
|
||||
c4.port . should_equal 5432
|
||||
c4.database . should_equal ""
|
||||
c4.jdbc_url . should_equal "jdbc:postgresql://localhost:5432"
|
||||
c4.database . should_equal "postgres"
|
||||
c4.jdbc_url . should_equal "jdbc:postgresql://localhost:5432/postgres"
|
||||
|
||||
add_ssl props = props+[Pair.new 'sslmode' 'prefer']
|
||||
Test.specify "should use the given credentials" <|
|
||||
c = Postgres credentials=(Credentials.Username_And_Password "myuser" "mypass")
|
||||
c.jdbc_url . should_equal "jdbc:postgresql://localhost:5432"
|
||||
c.jdbc_url . should_equal "jdbc:postgresql://localhost:5432/postgres"
|
||||
c.jdbc_properties . should_equal <| add_ssl [Pair.new "user" "myuser", Pair.new "password" "mypass"]
|
||||
|
||||
Test.specify "should fallback to environment variables and fill-out missing information based on the PGPASS file (if available)" <|
|
||||
c1 = Postgres
|
||||
c1.jdbc_url . should_equal "jdbc:postgresql://localhost:5432"
|
||||
c1.jdbc_url . should_equal "jdbc:postgresql://localhost:5432/postgres"
|
||||
|
||||
c1.jdbc_properties . should_equal <| add_ssl []
|
||||
Test_Environment.unsafe_with_environment_override "PGPASSWORD" "somepassword" <|
|
||||
|
@ -1,281 +1,337 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table import Table, Data_Formatter
|
||||
from Standard.Table import Table, Data_Formatter, Column
|
||||
from Standard.Table.Data.Column_Type_Selection import Column_Type_Selection, Auto
|
||||
from Standard.Table.Errors import Invalid_Format, Leading_Zeros, Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector
|
||||
|
||||
from Standard.Test import Test, Test_Suite, Problems
|
||||
import Standard.Test.Extensions
|
||||
|
||||
spec = Test.group "Table.parse_values" <|
|
||||
Test.specify "should correctly parse integers" <|
|
||||
t1 = Table.new [["ints", ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing]]]
|
||||
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Integer]
|
||||
t2.at "ints" . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing]
|
||||
spec =
|
||||
Test.group "Table.parse_values" <|
|
||||
Test.specify "should correctly parse integers" <|
|
||||
t1 = Table.new [["ints", ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing]]]
|
||||
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Integer]
|
||||
t2.at "ints" . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing]
|
||||
|
||||
Test.specify "should correctly parse decimals" <|
|
||||
t1 = Table.new [["ints", ["0", "+0", "-0", "+1", "-1", "1", "12345", Nothing]]]
|
||||
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t2.at "ints" . to_vector . should_equal [0, 0, 0, 1, -1, 1, 12345, Nothing]
|
||||
t2.at "ints" . to_vector . map .to_text . should_equal ["0.0", "0.0", "-0.0", "1.0", "-1.0", "1.0", "12345.0", "Nothing"]
|
||||
Test.specify "should correctly parse decimals" <|
|
||||
t1 = Table.new [["ints", ["0", "+0", "-0", "+1", "-1", "1", "12345", Nothing]]]
|
||||
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t2.at "ints" . to_vector . should_equal [0, 0, 0, 1, -1, 1, 12345, Nothing]
|
||||
t2.at "ints" . to_vector . map .to_text . should_equal ["0.0", "0.0", "-0.0", "1.0", "-1.0", "1.0", "12345.0", "Nothing"]
|
||||
|
||||
t3 = Table.new [["floats", ["0.0", "+0.0", "-0.0", "+1.0", "-1.0", "1.0", "0.0000", "10.", "12345."]]]
|
||||
t4 = t3.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t4.at "floats" . to_vector . should_equal [0, 0, 0, 1, -1, 1, 0, 10, 12345]
|
||||
t3 = Table.new [["floats", ["0.0", "+0.0", "-0.0", "+1.0", "-1.0", "1.0", "0.0000", "10.", "12345."]]]
|
||||
t4 = t3.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t4.at "floats" . to_vector . should_equal [0, 0, 0, 1, -1, 1, 0, 10, 12345]
|
||||
|
||||
t5 = Table.new [["floats", [".0", "0.", "1.", ".1", ".123", "-.1", "+.1", "+0.0", "0.1234", Nothing, "11111111.111"]]]
|
||||
t6 = t5.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t6.at "floats" . to_vector . should_equal [0.0, 0.0, 1.0, 0.1, 0.123, -0.1, 0.1, 0.0, 0.1234, Nothing, 11111111.111]
|
||||
t5 = Table.new [["floats", [".0", "0.", "1.", ".1", ".123", "-.1", "+.1", "+0.0", "0.1234", Nothing, "11111111.111"]]]
|
||||
t6 = t5.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t6.at "floats" . to_vector . should_equal [0.0, 0.0, 1.0, 0.1, 0.123, -0.1, 0.1, 0.0, 0.1234, Nothing, 11111111.111]
|
||||
|
||||
Test.specify "should warn on leading zeros in numbers, if asked" <|
|
||||
t1 = Table.new [["ints", ["0", "+00", "-00", "+01", "-01", "01", "000", "0010", "12345", Nothing]]]
|
||||
t2 = Table.new [["floats", ["0.0000", ".0", "00.", "01.0", "-0010.0000", "1.0000"]]]
|
||||
Test.specify "should warn on leading zeros in numbers, if asked" <|
|
||||
t1 = Table.new [["ints", ["0", "+00", "-00", "+01", "-01", "01", "000", "0010", "12345", Nothing]]]
|
||||
t2 = Table.new [["floats", ["0.0000", ".0", "00.", "01.0", "-0010.0000", "1.0000"]]]
|
||||
|
||||
t1_parsed = [0, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, 12345, Nothing]
|
||||
t1_zeros = ["+00", "-00", "+01", "-01", "01", "000", "0010"]
|
||||
t3 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Integer]
|
||||
t3.at "ints" . to_vector . should_equal t1_parsed
|
||||
Problems.get_attached_warnings t3 . should_equal [Leading_Zeros.Error "ints" Integer t1_zeros]
|
||||
t1_parsed = [0, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, 12345, Nothing]
|
||||
t1_zeros = ["+00", "-00", "+01", "-01", "01", "000", "0010"]
|
||||
t3 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Integer]
|
||||
t3.at "ints" . to_vector . should_equal t1_parsed
|
||||
Problems.get_attached_warnings t3 . should_equal [Leading_Zeros.Error "ints" Integer t1_zeros]
|
||||
|
||||
t4 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t4.at "ints" . to_vector . should_equal t1_parsed
|
||||
Problems.get_attached_warnings t4 . should_equal [Leading_Zeros.Error "ints" Decimal t1_zeros]
|
||||
t4 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t4.at "ints" . to_vector . should_equal t1_parsed
|
||||
Problems.get_attached_warnings t4 . should_equal [Leading_Zeros.Error "ints" Decimal t1_zeros]
|
||||
|
||||
t5 = t2.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t5.at "floats" . to_vector . should_equal [0.0, 0.0, Nothing, Nothing, Nothing, 1.0]
|
||||
Problems.get_attached_warnings t5 . should_equal [Leading_Zeros.Error "floats" Decimal ["00.", "01.0", '-0010.0000']]
|
||||
t5 = t2.parse_values column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t5.at "floats" . to_vector . should_equal [0.0, 0.0, Nothing, Nothing, Nothing, 1.0]
|
||||
Problems.get_attached_warnings t5 . should_equal [Leading_Zeros.Error "floats" Decimal ["00.", "01.0", '-0010.0000']]
|
||||
|
||||
opts = Data_Formatter.Value allow_leading_zeros=True
|
||||
t1_parsed_zeros = [0, 0, 0, 1, -1, 1, 0, 10, 12345, Nothing]
|
||||
t6 = t1.parse_values value_formatter=opts column_types=[Column_Type_Selection.Value 0 Integer]
|
||||
t6.at "ints" . to_vector . should_equal t1_parsed_zeros
|
||||
Warning.get_all t6 . should_equal []
|
||||
opts = Data_Formatter.Value allow_leading_zeros=True
|
||||
t1_parsed_zeros = [0, 0, 0, 1, -1, 1, 0, 10, 12345, Nothing]
|
||||
t6 = t1.parse_values value_formatter=opts column_types=[Column_Type_Selection.Value 0 Integer]
|
||||
t6.at "ints" . to_vector . should_equal t1_parsed_zeros
|
||||
Warning.get_all t6 . should_equal []
|
||||
|
||||
t7 = t1.parse_values value_formatter=opts column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t7.at "ints" . to_vector . should_equal t1_parsed_zeros
|
||||
Warning.get_all t7 . should_equal []
|
||||
t8 = t2.parse_values value_formatter=opts column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t8.at "floats" . to_vector . should_equal [0.0, 0.0, 0.0, 1.0, -10.0, 1.0]
|
||||
Warning.get_all t8 . should_equal []
|
||||
t7 = t1.parse_values value_formatter=opts column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t7.at "ints" . to_vector . should_equal t1_parsed_zeros
|
||||
Warning.get_all t7 . should_equal []
|
||||
t8 = t2.parse_values value_formatter=opts column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t8.at "floats" . to_vector . should_equal [0.0, 0.0, 0.0, 1.0, -10.0, 1.0]
|
||||
Warning.get_all t8 . should_equal []
|
||||
|
||||
Test.specify "should correctly parse booleans" <|
|
||||
t1 = Table.new [["bools", ["true", "false", "True", "TRUE", "FALSE", Nothing, "False"]]]
|
||||
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Boolean]
|
||||
t2.at "bools" . to_vector . should_equal [True, False, True, True, False, Nothing, False]
|
||||
Test.specify "should correctly parse booleans" <|
|
||||
t1 = Table.new [["bools", ["true", "false", "True", "TRUE", "FALSE", Nothing, "False"]]]
|
||||
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Boolean]
|
||||
t2.at "bools" . to_vector . should_equal [True, False, True, True, False, Nothing, False]
|
||||
|
||||
Test.specify "should correctly parse date and time" <|
|
||||
t1 = Table.new [["dates", ["2022-05-07", "2000-01-01", "2010-12-31"]]]
|
||||
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Date]
|
||||
t2.at "dates" . to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31]
|
||||
Test.specify "should correctly parse date and time" <|
|
||||
t1 = Table.new [["dates", ["2022-05-07", "2000-01-01", "2010-12-31"]]]
|
||||
t2 = t1.parse_values column_types=[Column_Type_Selection.Value 0 Date]
|
||||
t2.at "dates" . to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31]
|
||||
|
||||
t3 = Table.new [["datetimes", ["2022-05-07 23:59:59", "2000-01-01 00:00:00", "2010-12-31 12:34:56"]]]
|
||||
t4 = t3.parse_values column_types=[Column_Type_Selection.Value 0 Date_Time]
|
||||
t4.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56]
|
||||
t3 = Table.new [["datetimes", ["2022-05-07 23:59:59", "2000-01-01 00:00:00", "2010-12-31 12:34:56"]]]
|
||||
t4 = t3.parse_values column_types=[Column_Type_Selection.Value 0 Date_Time]
|
||||
t4.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56]
|
||||
|
||||
t5 = Table.new [["times", ["23:59:59", "00:00:00", "12:34:56"]]]
|
||||
t6 = t5.parse_values column_types=[Column_Type_Selection.Value 0 Time_Of_Day]
|
||||
t6.at "times" . to_vector . should_equal [Time_Of_Day.new 23 59 59, Time_Of_Day.new, Time_Of_Day.new 12 34 56]
|
||||
t5 = Table.new [["times", ["23:59:59", "00:00:00", "12:34:56"]]]
|
||||
t6 = t5.parse_values column_types=[Column_Type_Selection.Value 0 Time_Of_Day]
|
||||
t6.at "times" . to_vector . should_equal [Time_Of_Day.new 23 59 59, Time_Of_Day.new, Time_Of_Day.new 12 34 56]
|
||||
|
||||
Test.specify "should parse date and time in various formats" <|
|
||||
opts = Data_Formatter.Value date_formats=["d.M.y", "d MMM y[ G]", "E, d MMM y"] datetime_formats=["yyyy-MM-dd'T'HH:mm:ss", "dd/MM/yyyy HH:mm"] time_formats=["H:mm:ss.n", "h:mma"]
|
||||
Test.specify "should parse date and time in various formats" <|
|
||||
opts = Data_Formatter.Value date_formats=["d.M.y", "d MMM y[ G]", "E, d MMM y"] datetime_formats=["yyyy-MM-dd'T'HH:mm:ss", "dd/MM/yyyy HH:mm"] time_formats=["H:mm:ss.n", "h:mma"]
|
||||
|
||||
t1 = Table.new [["dates", ["1.2.476", "10 Jan 1900 AD", "Tue, 3 Jun 2008"]]]
|
||||
t2 = t1.parse_values opts column_types=[Column_Type_Selection.Value 0 Date]
|
||||
t2.at "dates" . to_vector . should_equal [Date.new 476 2 1, Date.new 1900 1 10, Date.new 2008 6 3]
|
||||
t1 = Table.new [["dates", ["1.2.476", "10 Jan 1900 AD", "Tue, 3 Jun 2008"]]]
|
||||
t2 = t1.parse_values opts column_types=[Column_Type_Selection.Value 0 Date]
|
||||
t2.at "dates" . to_vector . should_equal [Date.new 476 2 1, Date.new 1900 1 10, Date.new 2008 6 3]
|
||||
|
||||
t3 = Table.new [["datetimes", ["2011-12-03T10:15:30", "31/12/2012 22:33"]]]
|
||||
t4 = t3.parse_values opts column_types=[Column_Type_Selection.Value 0 Date_Time]
|
||||
t4.at "datetimes" . to_vector . should_equal [Date_Time.new 2011 12 3 10 15 30, Date_Time.new 2012 12 31 22 33]
|
||||
t3 = Table.new [["datetimes", ["2011-12-03T10:15:30", "31/12/2012 22:33"]]]
|
||||
t4 = t3.parse_values opts column_types=[Column_Type_Selection.Value 0 Date_Time]
|
||||
t4.at "datetimes" . to_vector . should_equal [Date_Time.new 2011 12 3 10 15 30, Date_Time.new 2012 12 31 22 33]
|
||||
|
||||
t5 = Table.new [["times", ["1:02:03.987654321", "1:30PM"]]]
|
||||
t6 = t5.parse_values opts column_types=[Column_Type_Selection.Value 0 Time_Of_Day]
|
||||
t6.at "times" . to_vector . should_equal [Time_Of_Day.new 1 2 3 nanosecond=987654321, Time_Of_Day.new 13 30 0 0]
|
||||
t5 = Table.new [["times", ["1:02:03.987654321", "1:30PM"]]]
|
||||
t6 = t5.parse_values opts column_types=[Column_Type_Selection.Value 0 Time_Of_Day]
|
||||
t6.at "times" . to_vector . should_equal [Time_Of_Day.new 1 2 3 nanosecond=987654321, Time_Of_Day.new 13 30 0 0]
|
||||
|
||||
Test.specify "should warn when cells do not fit the expected format" <|
|
||||
ints = ["ints", ["0", "1", "1.0", "foobar", "", "--1", "+-1", "10", "-+1"]]
|
||||
# Currently scientific notation is not supported and we document that in this test, in the future the situation may change and the test may need to be flipped.
|
||||
floats = ["floats", ["0", "2.0", "1e6", "foobar", "", "--1", "+-1", "100.", "-+1"]]
|
||||
bools = ["bools", ["True", "false", "fAlSE", "foobar", "", "0", "1", "true", "truefalse"]]
|
||||
time_content = ["2001-01-01", "2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
|
||||
t = Table.new [ints, floats, bools, ["dates", time_content], ["datetimes", time_content], ["times", time_content]]
|
||||
Test.specify "should warn when cells do not fit the expected format" <|
|
||||
ints = ["ints", ["0", "1", "1.0", "foobar", "", "--1", "+-1", "10", "-+1"]]
|
||||
# Currently scientific notation is not supported and we document that in this test, in the future the situation may change and the test may need to be flipped.
|
||||
floats = ["floats", ["0", "2.0", "1e6", "foobar", "", "--1", "+-1", "100.", "-+1"]]
|
||||
bools = ["bools", ["True", "false", "fAlSE", "foobar", "", "0", "1", "true", "truefalse"]]
|
||||
time_content = ["2001-01-01", "2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
|
||||
t = Table.new [ints, floats, bools, ["dates", time_content], ["datetimes", time_content], ["times", time_content]]
|
||||
|
||||
action pb =
|
||||
t.parse_values column_types=[Column_Type_Selection.Value 0 Integer, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "dates" Date, Column_Type_Selection.Value "datetimes" Date_Time, Column_Type_Selection.Value "times" Time_Of_Day] on_problems=pb
|
||||
tester t =
|
||||
t.at "ints" . to_vector . should_equal [0, 1, Nothing, Nothing, Nothing, Nothing, Nothing, 10, Nothing]
|
||||
t.at "floats" . to_vector . should_equal [0, 2, Nothing, Nothing, Nothing, Nothing, Nothing, 100, Nothing]
|
||||
t.at "bools" . to_vector . should_equal [True, False, Nothing, Nothing, Nothing, Nothing, Nothing, True, Nothing]
|
||||
t.at "dates" . to_vector . should_equal [Date.new 2001 1 1, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
|
||||
t.at "datetimes" . to_vector . should_equal [Nothing, Date_Time.new 2001 1 1 12 34 56, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
|
||||
t.at "times" . to_vector . should_equal [Nothing, Nothing, Time_Of_Day.new 10 0 10, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
|
||||
problems =
|
||||
ints = Invalid_Format.Error "ints" Integer ["1.0", "foobar", "", "--1", "+-1", "-+1"]
|
||||
floats = Invalid_Format.Error "floats" Decimal ["1e6", "foobar", "", "--1", "+-1", "-+1"]
|
||||
bools = Invalid_Format.Error "bools" Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]
|
||||
dates = Invalid_Format.Error "dates" Date ["2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
|
||||
datetimes = Invalid_Format.Error "datetimes" Date_Time ["2001-01-01", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
|
||||
times = Invalid_Format.Error "times" Time_Of_Day ["2001-01-01", "2001-01-01 12:34:56", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
|
||||
[ints, floats, bools, dates, datetimes, times]
|
||||
Problems.test_problem_handling action problems tester
|
||||
action pb =
|
||||
t.parse_values column_types=[Column_Type_Selection.Value 0 Integer, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "dates" Date, Column_Type_Selection.Value "datetimes" Date_Time, Column_Type_Selection.Value "times" Time_Of_Day] on_problems=pb
|
||||
tester t =
|
||||
t.at "ints" . to_vector . should_equal [0, 1, Nothing, Nothing, Nothing, Nothing, Nothing, 10, Nothing]
|
||||
t.at "floats" . to_vector . should_equal [0, 2, Nothing, Nothing, Nothing, Nothing, Nothing, 100, Nothing]
|
||||
t.at "bools" . to_vector . should_equal [True, False, Nothing, Nothing, Nothing, Nothing, Nothing, True, Nothing]
|
||||
t.at "dates" . to_vector . should_equal [Date.new 2001 1 1, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
|
||||
t.at "datetimes" . to_vector . should_equal [Nothing, Date_Time.new 2001 1 1 12 34 56, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
|
||||
t.at "times" . to_vector . should_equal [Nothing, Nothing, Time_Of_Day.new 10 0 10, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
|
||||
problems =
|
||||
ints = Invalid_Format.Error "ints" Integer ["1.0", "foobar", "", "--1", "+-1", "-+1"]
|
||||
floats = Invalid_Format.Error "floats" Decimal ["1e6", "foobar", "", "--1", "+-1", "-+1"]
|
||||
bools = Invalid_Format.Error "bools" Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]
|
||||
dates = Invalid_Format.Error "dates" Date ["2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
|
||||
datetimes = Invalid_Format.Error "datetimes" Date_Time ["2001-01-01", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
|
||||
times = Invalid_Format.Error "times" Time_Of_Day ["2001-01-01", "2001-01-01 12:34:56", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
|
||||
[ints, floats, bools, dates, datetimes, times]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
Test.specify "should leave not selected columns unaffected" <|
|
||||
t1 = Table.new [["A", ["1", "2"]], ["B", ["3", "4"]]]
|
||||
t2 = t1.parse_values column_types=[Column_Type_Selection.Value "B" Integer]
|
||||
t2.at "A" . to_vector . should_equal ["1", "2"]
|
||||
t2.at "B" . to_vector . should_equal [3, 4]
|
||||
Test.specify "should leave not selected columns unaffected" <|
|
||||
t1 = Table.new [["A", ["1", "2"]], ["B", ["3", "4"]]]
|
||||
t2 = t1.parse_values column_types=[Column_Type_Selection.Value "B" Integer]
|
||||
t2.at "A" . to_vector . should_equal ["1", "2"]
|
||||
t2.at "B" . to_vector . should_equal [3, 4]
|
||||
|
||||
Test.specify "should report missing columns in Column_Type_Selection" <|
|
||||
t = Table.new [["floats", ["1.0"]], ["ints", ["1"]], ["bools", ["True"]]]
|
||||
action pb =
|
||||
t.parse_values column_types=[Column_Type_Selection.Value -1 Boolean, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bars" Boolean, Column_Type_Selection.Value "foos" Boolean, Column_Type_Selection.Value 100 Boolean, Column_Type_Selection.Value -100 Boolean] on_problems=pb
|
||||
tester t =
|
||||
t.at "floats" . to_vector . should_equal [1.0]
|
||||
t.at "ints" . to_vector . should_equal ["1"]
|
||||
t.at "bools" . to_vector . should_equal [True]
|
||||
problems = [Missing_Input_Columns.Error ["bars", "foos"], Column_Indexes_Out_Of_Range.Error [100, -100]]
|
||||
Problems.test_problem_handling action problems tester
|
||||
Test.specify "should report missing columns in Column_Type_Selection" <|
|
||||
t = Table.new [["floats", ["1.0"]], ["ints", ["1"]], ["bools", ["True"]]]
|
||||
action pb =
|
||||
t.parse_values column_types=[Column_Type_Selection.Value -1 Boolean, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bars" Boolean, Column_Type_Selection.Value "foos" Boolean, Column_Type_Selection.Value 100 Boolean, Column_Type_Selection.Value -100 Boolean] on_problems=pb
|
||||
tester t =
|
||||
t.at "floats" . to_vector . should_equal [1.0]
|
||||
t.at "ints" . to_vector . should_equal ["1"]
|
||||
t.at "bools" . to_vector . should_equal [True]
|
||||
problems = [Missing_Input_Columns.Error ["bars", "foos"], Column_Indexes_Out_Of_Range.Error [100, -100]]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
Test.specify "should report duplicate columns in Column_Type_Selection" <|
|
||||
t = Table.new [["floats", ["1.0"]], ["ints", ["1"]], ["bools", ["True"]], ["foo", ["42"]]]
|
||||
action pb =
|
||||
t.parse_values column_types=[Column_Type_Selection.Value -2 Boolean, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "floats" Integer, Column_Type_Selection.Value "ints" Integer] on_problems=pb
|
||||
tester t =
|
||||
t.at "floats" . to_vector . should_equal ["1.0"]
|
||||
t.at "ints" . to_vector . should_equal [1]
|
||||
t.at "bools" . to_vector . should_equal [True]
|
||||
t.at "foo" . to_vector . should_equal ["42"]
|
||||
problems = [(Duplicate_Type_Selector.Error "floats" ambiguous=True), (Duplicate_Type_Selector.Error "bools" ambiguous=False)]
|
||||
Problems.test_problem_handling action problems tester
|
||||
Test.specify "should report duplicate columns in Column_Type_Selection" <|
|
||||
t = Table.new [["floats", ["1.0"]], ["ints", ["1"]], ["bools", ["True"]], ["foo", ["42"]]]
|
||||
action pb =
|
||||
t.parse_values column_types=[Column_Type_Selection.Value -2 Boolean, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "floats" Integer, Column_Type_Selection.Value "ints" Integer] on_problems=pb
|
||||
tester t =
|
||||
t.at "floats" . to_vector . should_equal ["1.0"]
|
||||
t.at "ints" . to_vector . should_equal [1]
|
||||
t.at "bools" . to_vector . should_equal [True]
|
||||
t.at "foo" . to_vector . should_equal ["42"]
|
||||
problems = [(Duplicate_Type_Selector.Error "floats" ambiguous=True), (Duplicate_Type_Selector.Error "bools" ambiguous=False)]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
Test.specify "should guess the datatype for columns" <|
|
||||
c1 = ["ints", ["1", " +2", "-123", Nothing]]
|
||||
c2 = ["ints0", ["01", "02 ", Nothing, "-1"]]
|
||||
c3 = ["floats", [" 1.0 ", "2.2", Nothing, "-1.0"]]
|
||||
c4 = ["bools", ["true", " False", Nothing, "True"]]
|
||||
c5 = ["floats+ints", ["1", "2.2 ", "-1.0", Nothing]]
|
||||
c6 = ["text", ["foobar", "foo", "", Nothing]]
|
||||
c7 = ["dates", ["2022-10-01", " 2000-01-01", "1999-01-02", Nothing]]
|
||||
c8 = ["datetimes", ["2022-10-01 01:02:03 ", "2000-01-01 01:02:03", "1999-01-02 01:02:03", Nothing]]
|
||||
c9 = ["times", ["01:02:03", " 00:00:00 ", "01:02:03", Nothing]]
|
||||
c10 = ["mixeddates", ["2022-10-01", "2000-01-01 01:02:03", "01:02:03", Nothing]]
|
||||
c11 = ["text+ints", ["1", "2", " foobar", Nothing]]
|
||||
t = Table.new [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11]
|
||||
t2 = t.parse_values
|
||||
Test.specify "should guess the datatype for columns" <|
|
||||
c1 = ["ints", ["1", " +2", "-123", Nothing]]
|
||||
c2 = ["ints0", ["01", "02 ", Nothing, "-1"]]
|
||||
c3 = ["floats", [" 1.0 ", "2.2", Nothing, "-1.0"]]
|
||||
c4 = ["bools", ["true", " False", Nothing, "True"]]
|
||||
c5 = ["floats+ints", ["1", "2.2 ", "-1.0", Nothing]]
|
||||
c6 = ["text", ["foobar", "foo", "", Nothing]]
|
||||
c7 = ["dates", ["2022-10-01", " 2000-01-01", "1999-01-02", Nothing]]
|
||||
c8 = ["datetimes", ["2022-10-01 01:02:03 ", "2000-01-01 01:02:03", "1999-01-02 01:02:03", Nothing]]
|
||||
c9 = ["times", ["01:02:03", " 00:00:00 ", "01:02:03", Nothing]]
|
||||
c10 = ["mixeddates", ["2022-10-01", "2000-01-01 01:02:03", "01:02:03", Nothing]]
|
||||
c11 = ["text+ints", ["1", "2", " foobar", Nothing]]
|
||||
t = Table.new [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11]
|
||||
t2 = t.parse_values
|
||||
|
||||
Warning.get_all t2 . should_equal []
|
||||
t2.at "ints" . to_vector . should_equal [1, 2, -123, Nothing]
|
||||
t2.at "ints" . to_vector . first . should_be_an Integer
|
||||
t2.at "ints0" . to_vector . should_equal ["01", "02", Nothing, "-1"]
|
||||
t2.at "floats" . to_vector . should_equal [1.0, 2.2, Nothing, -1.0]
|
||||
t2.at "bools" . to_vector . should_equal [True, False, Nothing, True]
|
||||
t2.at "floats+ints" . to_vector . should_equal [1.0, 2.2, -1.0, Nothing]
|
||||
t2.at "text" . to_vector . should_equal ["foobar", "foo", "", Nothing]
|
||||
t2.at "dates" . to_vector . should_equal [Date.new 2022 10 1, Date.new 2000 1 1, Date.new 1999 1 2, Nothing]
|
||||
t2.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 10 1 1 2 3, Date_Time.new 2000 1 1 1 2 3, Date_Time.new 1999 1 2 1 2 3, Nothing]
|
||||
t2.at "times" . to_vector . should_equal [Time_Of_Day.new 1 2 3, Time_Of_Day.new, Time_Of_Day.new 1 2 3, Nothing]
|
||||
t2.at "mixeddates" . to_vector . should_equal ["2022-10-01", "2000-01-01 01:02:03", "01:02:03", Nothing]
|
||||
t2.at "text+ints" . to_vector . should_equal ["1", "2", "foobar", Nothing]
|
||||
Warning.get_all t2 . should_equal []
|
||||
t2.at "ints" . to_vector . should_equal [1, 2, -123, Nothing]
|
||||
t2.at "ints" . to_vector . first . should_be_an Integer
|
||||
t2.at "ints0" . to_vector . should_equal ["01", "02", Nothing, "-1"]
|
||||
t2.at "floats" . to_vector . should_equal [1.0, 2.2, Nothing, -1.0]
|
||||
t2.at "bools" . to_vector . should_equal [True, False, Nothing, True]
|
||||
t2.at "floats+ints" . to_vector . should_equal [1.0, 2.2, -1.0, Nothing]
|
||||
t2.at "text" . to_vector . should_equal ["foobar", "foo", "", Nothing]
|
||||
t2.at "dates" . to_vector . should_equal [Date.new 2022 10 1, Date.new 2000 1 1, Date.new 1999 1 2, Nothing]
|
||||
t2.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 10 1 1 2 3, Date_Time.new 2000 1 1 1 2 3, Date_Time.new 1999 1 2 1 2 3, Nothing]
|
||||
t2.at "times" . to_vector . should_equal [Time_Of_Day.new 1 2 3, Time_Of_Day.new, Time_Of_Day.new 1 2 3, Nothing]
|
||||
t2.at "mixeddates" . to_vector . should_equal ["2022-10-01", "2000-01-01 01:02:03", "01:02:03", Nothing]
|
||||
t2.at "text+ints" . to_vector . should_equal ["1", "2", "foobar", Nothing]
|
||||
|
||||
t3 = Table.new [["bools", ["1", "0", "True"]], ["ints", ["1", "0", "0"]]] . parse_values (Data_Formatter.Value true_values=["1", "True"] false_values=["0", "False"])
|
||||
t3.at "bools" . to_vector . should_equal [True, False, True]
|
||||
t3.at "ints" . to_vector . should_equal [1, 0, 0]
|
||||
t3 = Table.new [["bools", ["1", "0", "True"]], ["ints", ["1", "0", "0"]]] . parse_values (Data_Formatter.Value true_values=["1", "True"] false_values=["0", "False"])
|
||||
t3.at "bools" . to_vector . should_equal [True, False, True]
|
||||
t3.at "ints" . to_vector . should_equal [1, 0, 0]
|
||||
|
||||
t4 = Table.new [c2] . parse_values (Data_Formatter.Value allow_leading_zeros=True)
|
||||
t4 . at "ints0" . to_vector . should_equal [1, 2, Nothing, -1]
|
||||
t4 = Table.new [c2] . parse_values (Data_Formatter.Value allow_leading_zeros=True)
|
||||
t4 . at "ints0" . to_vector . should_equal [1, 2, Nothing, -1]
|
||||
|
||||
t5 = t.parse_values column_types=[Column_Type_Selection.Value "ints" Decimal, Column_Type_Selection.Value "floats" Auto, Column_Type_Selection.Value "text+ints" Auto]
|
||||
t5.at "ints" . to_vector . should_equal [1.0, 2.0, -123.0, Nothing]
|
||||
# `ints` are requested to be parsed as decimals.
|
||||
t5.at "ints" . to_vector . first . should_be_a Decimal
|
||||
# `floats` are auto-detected as decimals.
|
||||
t5.at "floats" . to_vector . should_equal [1.0, 2.2, Nothing, -1.0]
|
||||
# `text+ints` is attempted to be parsed (hence whitespace is stripped), but it only fits the text type.
|
||||
t5.at "text+ints" . to_vector . should_equal ["1", "2", "foobar", Nothing]
|
||||
# `bools` are not requested to be parsed, so they are kept as-is, with leading whitespace etc.
|
||||
t5.at "bools" . to_vector . should_equal ["true", " False", Nothing, "True"]
|
||||
t5 = t.parse_values column_types=[Column_Type_Selection.Value "ints" Decimal, Column_Type_Selection.Value "floats" Auto, Column_Type_Selection.Value "text+ints" Auto]
|
||||
t5.at "ints" . to_vector . should_equal [1.0, 2.0, -123.0, Nothing]
|
||||
# `ints` are requested to be parsed as decimals.
|
||||
t5.at "ints" . to_vector . first . should_be_a Decimal
|
||||
# `floats` are auto-detected as decimals.
|
||||
t5.at "floats" . to_vector . should_equal [1.0, 2.2, Nothing, -1.0]
|
||||
# `text+ints` is attempted to be parsed (hence whitespace is stripped), but it only fits the text type.
|
||||
t5.at "text+ints" . to_vector . should_equal ["1", "2", "foobar", Nothing]
|
||||
# `bools` are not requested to be parsed, so they are kept as-is, with leading whitespace etc.
|
||||
t5.at "bools" . to_vector . should_equal ["true", " False", Nothing, "True"]
|
||||
|
||||
Test.specify "should allow to specify a thousands separator and a custom decimal point" <|
|
||||
opts = Data_Formatter.Value decimal_point=',' thousand_separator='_'
|
||||
t1 = Table.new [["floats", ["0,0", "+0,0", "-0,0", "+1,5", "-1,2", "1,0", "0,0000", "10_000,", ",0"]]]
|
||||
t2 = t1.parse_values opts column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t2.at "floats" . to_vector . should_equal [0.0, 0.0, 0.0, 1.5, -1.2, 1.0, 0.0, 10000.0, 0.0]
|
||||
Test.specify "should allow to specify a thousands separator and a custom decimal point" <|
|
||||
opts = Data_Formatter.Value decimal_point=',' thousand_separator='_'
|
||||
t1 = Table.new [["floats", ["0,0", "+0,0", "-0,0", "+1,5", "-1,2", "1,0", "0,0000", "10_000,", ",0"]]]
|
||||
t2 = t1.parse_values opts column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t2.at "floats" . to_vector . should_equal [0.0, 0.0, 0.0, 1.5, -1.2, 1.0, 0.0, 10000.0, 0.0]
|
||||
|
||||
t3 = Table.new [["xs", ["1,2", "1.2", "_0", "0_", "1_0_0"]]]
|
||||
t4 = t3.parse_values opts column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t4.at "xs" . to_vector . should_equal [1.2, Nothing, Nothing, Nothing, 100.0]
|
||||
Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "xs" Decimal ["1.2", "_0", "0_"]]
|
||||
t5 = t3.parse_values opts column_types=[Column_Type_Selection.Value 0 Integer]
|
||||
t5.at "xs" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, 100.0]
|
||||
Problems.get_attached_warnings t5 . should_equal [Invalid_Format.Error "xs" Integer ["1,2", "1.2", "_0", "0_"]]
|
||||
t3 = Table.new [["xs", ["1,2", "1.2", "_0", "0_", "1_0_0"]]]
|
||||
t4 = t3.parse_values opts column_types=[Column_Type_Selection.Value 0 Decimal]
|
||||
t4.at "xs" . to_vector . should_equal [1.2, Nothing, Nothing, Nothing, 100.0]
|
||||
Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "xs" Decimal ["1.2", "_0", "0_"]]
|
||||
t5 = t3.parse_values opts column_types=[Column_Type_Selection.Value 0 Integer]
|
||||
t5.at "xs" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, 100.0]
|
||||
Problems.get_attached_warnings t5 . should_equal [Invalid_Format.Error "xs" Integer ["1,2", "1.2", "_0", "0_"]]
|
||||
|
||||
Test.specify "should allow to specify custom values for booleans" <|
|
||||
opts_1 = Data_Formatter.Value true_values=["1", "YES"] false_values=["0"]
|
||||
t1 = Table.new [["bools", ["1", "0", "YES", "1", "0"]]]
|
||||
t2 = t1.parse_values opts_1 column_types=[Column_Type_Selection.Value 0 Boolean]
|
||||
t2.at "bools" . to_vector . should_equal [True, False, True, True, False]
|
||||
Test.specify "should allow to specify custom values for booleans" <|
|
||||
opts_1 = Data_Formatter.Value true_values=["1", "YES"] false_values=["0"]
|
||||
t1 = Table.new [["bools", ["1", "0", "YES", "1", "0"]]]
|
||||
t2 = t1.parse_values opts_1 column_types=[Column_Type_Selection.Value 0 Boolean]
|
||||
t2.at "bools" . to_vector . should_equal [True, False, True, True, False]
|
||||
|
||||
t3 = Table.new [["bools", ["1", "NO", "False", "True", "YES", "no", "oui", "0"]]]
|
||||
t4 = t3.parse_values opts_1 column_types=[Column_Type_Selection.Value 0 Boolean]
|
||||
t4.at "bools" . to_vector . should_equal [True, Nothing, Nothing, Nothing, True, Nothing, Nothing, False]
|
||||
Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "bools" Boolean ["NO", "False", "True", "no", "oui"]]
|
||||
t3 = Table.new [["bools", ["1", "NO", "False", "True", "YES", "no", "oui", "0"]]]
|
||||
t4 = t3.parse_values opts_1 column_types=[Column_Type_Selection.Value 0 Boolean]
|
||||
t4.at "bools" . to_vector . should_equal [True, Nothing, Nothing, Nothing, True, Nothing, Nothing, False]
|
||||
Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "bools" Boolean ["NO", "False", "True", "no", "oui"]]
|
||||
|
||||
whitespace_table =
|
||||
ints = ["ints", ["0", "1 ", "0 1", " 2"]]
|
||||
floats = ["floats", ["0 ", " 2.0", "- 1", "10.0"]]
|
||||
bools = ["bools", ["True ", " false", "t rue", "False"]]
|
||||
dates = ["dates", [" 2022-01-01", "2022-07-17 ", "2022 - 07 - 17", ""]]
|
||||
datetimes = ["datetimes", [" 2022-01-01 11:59:00 ", Nothing, "2022 - 07 - 17 1:2:3 ", "2022-01-01 11:59:00"]]
|
||||
times = ["times", ["11:00:00 ", " 00:00:00", "00 : 00 : 00", Nothing]]
|
||||
Table.new [ints, floats, bools, dates, datetimes, times]
|
||||
whitespace_table =
|
||||
ints = ["ints", ["0", "1 ", "0 1", " 2"]]
|
||||
floats = ["floats", ["0 ", " 2.0", "- 1", "10.0"]]
|
||||
bools = ["bools", ["True ", " false", "t rue", "False"]]
|
||||
dates = ["dates", [" 2022-01-01", "2022-07-17 ", "2022 - 07 - 17", ""]]
|
||||
datetimes = ["datetimes", [" 2022-01-01 11:59:00 ", Nothing, "2022 - 07 - 17 1:2:3 ", "2022-01-01 11:59:00"]]
|
||||
times = ["times", ["11:00:00 ", " 00:00:00", "00 : 00 : 00", Nothing]]
|
||||
Table.new [ints, floats, bools, dates, datetimes, times]
|
||||
|
||||
Test.specify "should trim input values by default" <|
|
||||
t1 = whitespace_table.parse_values column_types=[Column_Type_Selection.Value 0 Integer, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "dates" Date, Column_Type_Selection.Value "datetimes" Date_Time, Column_Type_Selection.Value "times" Time_Of_Day]
|
||||
t1.at "ints" . to_vector . should_equal [0, 1, Nothing, 2]
|
||||
t1.at "floats" . to_vector . should_equal [0.0, 2.0, Nothing, 10.0]
|
||||
t1.at "bools" . to_vector . should_equal [True, False, Nothing, False]
|
||||
t1.at "dates" . to_vector . should_equal [Date.new 2022 1 1, Date.new 2022 7 17, Nothing, Nothing]
|
||||
t1.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 1 1 11 59, Nothing, Nothing, Nothing]
|
||||
t1.at "times" . to_vector . should_equal [Time_Of_Day.new 11 0 0, Time_Of_Day.new, Nothing, Nothing]
|
||||
warnings = Problems.get_attached_warnings t1
|
||||
expected_warnings = Vector.new_builder
|
||||
expected_warnings.append (Invalid_Format.Error "ints" Integer ["0 1"])
|
||||
expected_warnings.append (Invalid_Format.Error "floats" Decimal ["- 1"])
|
||||
expected_warnings.append (Invalid_Format.Error "bools" Boolean ["t rue"])
|
||||
expected_warnings.append (Invalid_Format.Error "dates" Date ["2022 - 07 - 17", ""])
|
||||
expected_warnings.append (Invalid_Format.Error "datetimes" Date_Time ["2022 - 07 - 17 1:2:3", "2022-01-01 11:59:00"])
|
||||
expected_warnings.append (Invalid_Format.Error "times" Time_Of_Day ["00 : 00 : 00"])
|
||||
warnings.should_contain_the_same_elements_as expected_warnings.to_vector
|
||||
Test.specify "should trim input values by default" <|
|
||||
t1 = whitespace_table.parse_values column_types=[Column_Type_Selection.Value 0 Integer, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "dates" Date, Column_Type_Selection.Value "datetimes" Date_Time, Column_Type_Selection.Value "times" Time_Of_Day]
|
||||
t1.at "ints" . to_vector . should_equal [0, 1, Nothing, 2]
|
||||
t1.at "floats" . to_vector . should_equal [0.0, 2.0, Nothing, 10.0]
|
||||
t1.at "bools" . to_vector . should_equal [True, False, Nothing, False]
|
||||
t1.at "dates" . to_vector . should_equal [Date.new 2022 1 1, Date.new 2022 7 17, Nothing, Nothing]
|
||||
t1.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 1 1 11 59, Nothing, Nothing, Nothing]
|
||||
t1.at "times" . to_vector . should_equal [Time_Of_Day.new 11 0 0, Time_Of_Day.new, Nothing, Nothing]
|
||||
warnings = Problems.get_attached_warnings t1
|
||||
expected_warnings = Vector.new_builder
|
||||
expected_warnings.append (Invalid_Format.Error "ints" Integer ["0 1"])
|
||||
expected_warnings.append (Invalid_Format.Error "floats" Decimal ["- 1"])
|
||||
expected_warnings.append (Invalid_Format.Error "bools" Boolean ["t rue"])
|
||||
expected_warnings.append (Invalid_Format.Error "dates" Date ["2022 - 07 - 17", ""])
|
||||
expected_warnings.append (Invalid_Format.Error "datetimes" Date_Time ["2022 - 07 - 17 1:2:3", "2022-01-01 11:59:00"])
|
||||
expected_warnings.append (Invalid_Format.Error "times" Time_Of_Day ["00 : 00 : 00"])
|
||||
warnings.should_contain_the_same_elements_as expected_warnings.to_vector
|
||||
|
||||
Test.specify "should fail to parse if whitespace is present and trimming is turned off" <|
|
||||
opts = Data_Formatter.Value trim_values=False
|
||||
t1 = whitespace_table.parse_values opts column_types=[Column_Type_Selection.Value 0 Integer, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "dates" Date, Column_Type_Selection.Value "datetimes" Date_Time, Column_Type_Selection.Value "times" Time_Of_Day]
|
||||
t1.at "ints" . to_vector . should_equal [0, Nothing, Nothing, Nothing]
|
||||
t1.at "floats" . to_vector . should_equal [Nothing, Nothing, Nothing, 10.0]
|
||||
t1.at "bools" . to_vector . should_equal [Nothing, Nothing, Nothing, False]
|
||||
nulls = Vector.new 4 _->Nothing
|
||||
t1.at "dates" . to_vector . should_equal nulls
|
||||
t1.at "datetimes" . to_vector . should_equal nulls
|
||||
t1.at "times" . to_vector . should_equal nulls
|
||||
warnings = Problems.get_attached_warnings t1
|
||||
expected_warnings = Vector.new_builder
|
||||
expected_warnings.append (Invalid_Format.Error "ints" Integer ["1 ", "0 1", " 2"])
|
||||
expected_warnings.append (Invalid_Format.Error "floats" Decimal ["0 ", " 2.0", "- 1"])
|
||||
expected_warnings.append (Invalid_Format.Error "bools" Boolean ["True ", " false", "t rue"])
|
||||
expected_warnings.append (Invalid_Format.Error "dates" Date [" 2022-01-01", "2022-07-17 ", "2022 - 07 - 17", ""])
|
||||
expected_warnings.append (Invalid_Format.Error "datetimes" Date_Time [" 2022-01-01 11:59:00 ", "2022 - 07 - 17 1:2:3 ", "2022-01-01 11:59:00"])
|
||||
expected_warnings.append (Invalid_Format.Error "times" Time_Of_Day ["11:00:00 ", " 00:00:00", "00 : 00 : 00"])
|
||||
warnings.should_contain_the_same_elements_as expected_warnings.to_vector
|
||||
Test.specify "should fail to parse if whitespace is present and trimming is turned off" <|
|
||||
opts = Data_Formatter.Value trim_values=False
|
||||
t1 = whitespace_table.parse_values opts column_types=[Column_Type_Selection.Value 0 Integer, Column_Type_Selection.Value "floats" Decimal, Column_Type_Selection.Value "bools" Boolean, Column_Type_Selection.Value "dates" Date, Column_Type_Selection.Value "datetimes" Date_Time, Column_Type_Selection.Value "times" Time_Of_Day]
|
||||
t1.at "ints" . to_vector . should_equal [0, Nothing, Nothing, Nothing]
|
||||
t1.at "floats" . to_vector . should_equal [Nothing, Nothing, Nothing, 10.0]
|
||||
t1.at "bools" . to_vector . should_equal [Nothing, Nothing, Nothing, False]
|
||||
nulls = Vector.new 4 _->Nothing
|
||||
t1.at "dates" . to_vector . should_equal nulls
|
||||
t1.at "datetimes" . to_vector . should_equal nulls
|
||||
t1.at "times" . to_vector . should_equal nulls
|
||||
warnings = Problems.get_attached_warnings t1
|
||||
expected_warnings = Vector.new_builder
|
||||
expected_warnings.append (Invalid_Format.Error "ints" Integer ["1 ", "0 1", " 2"])
|
||||
expected_warnings.append (Invalid_Format.Error "floats" Decimal ["0 ", " 2.0", "- 1"])
|
||||
expected_warnings.append (Invalid_Format.Error "bools" Boolean ["True ", " false", "t rue"])
|
||||
expected_warnings.append (Invalid_Format.Error "dates" Date [" 2022-01-01", "2022-07-17 ", "2022 - 07 - 17", ""])
|
||||
expected_warnings.append (Invalid_Format.Error "datetimes" Date_Time [" 2022-01-01 11:59:00 ", "2022 - 07 - 17 1:2:3 ", "2022-01-01 11:59:00"])
|
||||
expected_warnings.append (Invalid_Format.Error "times" Time_Of_Day ["11:00:00 ", " 00:00:00", "00 : 00 : 00"])
|
||||
warnings.should_contain_the_same_elements_as expected_warnings.to_vector
|
||||
|
||||
Test.specify "should fallback to text if whitespace is present and trimming is turned off" <|
|
||||
c1 = ["1", " +2", "-123", Nothing]
|
||||
c2 = [" 1.0 ", "2.2", Nothing, "-1.0"]
|
||||
c3 = ["true", " False", Nothing, "True"]
|
||||
t = Table.new [["ints", c1], ["floats", c2], ["bools", c3]]
|
||||
t2 = t.parse_values (Data_Formatter.Value trim_values=False)
|
||||
Test.specify "should fallback to text if whitespace is present and trimming is turned off" <|
|
||||
c1 = ["1", " +2", "-123", Nothing]
|
||||
c2 = [" 1.0 ", "2.2", Nothing, "-1.0"]
|
||||
c3 = ["true", " False", Nothing, "True"]
|
||||
t = Table.new [["ints", c1], ["floats", c2], ["bools", c3]]
|
||||
t2 = t.parse_values (Data_Formatter.Value trim_values=False)
|
||||
|
||||
Warning.get_all t2 . should_equal []
|
||||
t2.at "ints" . to_vector . should_equal c1
|
||||
t2.at "floats" . to_vector . should_equal c2
|
||||
t2.at "bools" . to_vector . should_equal c3
|
||||
Warning.get_all t2 . should_equal []
|
||||
t2.at "ints" . to_vector . should_equal c1
|
||||
t2.at "floats" . to_vector . should_equal c2
|
||||
t2.at "bools" . to_vector . should_equal c3
|
||||
|
||||
Test.group "Column.parse" <|
|
||||
Test.specify "should correctly parse integers" <|
|
||||
c1 = Column.from_vector "ints" ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing]
|
||||
c2 = c1.parse Integer
|
||||
c2.name.should_equal c1.name
|
||||
c2 . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing]
|
||||
|
||||
Test.specify "should correctly parse decimals" <|
|
||||
c1 = Column.from_vector "ints" ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing]
|
||||
c2 = c1.parse Decimal
|
||||
c2.name.should_equal c1.name
|
||||
c2 . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing]
|
||||
c2.to_vector . map .to_text . should_equal ["0.0", "0.0", "-0.0", "1.0", "-1.0", "1.0", "Nothing", "Nothing", "12345.0", "Nothing"]
|
||||
|
||||
c3 = Column.from_vector "floats" ["0.0", "+0.0", "-0.0", "+1.0", "-1.0", "1.0", "0.0000", "10.", "12345."]
|
||||
c4 = c3.parse Decimal
|
||||
c4.to_vector . should_equal [0, 0, 0, 1, -1, 1, 0, 10, 12345]
|
||||
|
||||
c5 = Column.from_vector "floats" [".0", "0.", "1.", ".1", ".123", "-.1", "+.1", "+0.0", "0.1234", Nothing, "11111111.111"]
|
||||
c6 = c5.parse Decimal
|
||||
c6.to_vector . should_equal [0.0, 0.0, 1.0, 0.1, 0.123, -0.1, 0.1, 0.0, 0.1234, Nothing, 11111111.111]
|
||||
|
||||
Test.specify "should correctly parse booleans" <|
|
||||
c1 = Column.from_vector "bools" ["true", "false", "True", "TRUE", "FALSE", Nothing, "False"]
|
||||
c2 = c1.parse Boolean
|
||||
c2.name.should_equal c1.name
|
||||
c2.to_vector . should_equal [True, False, True, True, False, Nothing, False]
|
||||
c1.parse . to_vector . should_equal [True, False, True, True, False, Nothing, False]
|
||||
|
||||
c3 = Column.from_vector "bools" ["yes", "no", Nothing]
|
||||
c4 = c3.parse Boolean "yes|no"
|
||||
c4.to_vector . should_equal [True, False, Nothing]
|
||||
|
||||
Test.specify "should correctly parse date and time" <|
|
||||
c1 = Column.from_vector "date" ["2022-05-07", "2000-01-01", "2010-12-31"]
|
||||
c2 = c1.parse Date
|
||||
c2.to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31]
|
||||
|
||||
c3 = Column.from_vector "datetimes" ["2022-05-07 23:59:59", "2000-01-01 00:00:00", "2010-12-31 12:34:56"]
|
||||
c4 = c3.parse Date_Time
|
||||
c4.to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56]
|
||||
|
||||
c5 = Column.from_vector "times" ["23:59:59", "00:00:00", "12:34:56"]
|
||||
c6 = c5.parse Time_Of_Day
|
||||
c6.to_vector . should_equal [Time_Of_Day.new 23 59 59, Time_Of_Day.new, Time_Of_Day.new 12 34 56]
|
||||
|
||||
Test.specify "should correctly parse date and time with format" <|
|
||||
c1 = Column.from_vector "date" ["5/7/2022", "1/1/2000", "12/31/2010"]
|
||||
c2 = c1.parse Date "M/d/yyyy"
|
||||
c2.to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31]
|
||||
|
||||
c3 = Column.from_vector "datetimes" ["5/7/2022 23:59:59", "1/1/2000 00:00:00", "12/31/2010 12:34:56"]
|
||||
c4 = c3.parse Date_Time "M/d/yyyy HH:mm:ss"
|
||||
c4.to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56]
|
||||
|
||||
main = Test_Suite.run_main spec
|
||||
|
@ -844,7 +844,7 @@ spec =
|
||||
|
||||
t.filter "X" (Filter_Condition.Is_In in_vector) on_problems=Report_Error . at "X" . to_vector . should_equal expected_vector
|
||||
t.filter "X" (Filter_Condition.Is_In in_column) on_problems=Report_Error . at "X" . to_vector . should_equal expected_vector
|
||||
t2 = t.set "Y" (t.at "X" . not)
|
||||
t2 = t.set (t.at "X" . not) new_name="Y"
|
||||
t2.filter "Y" (Filter_Condition.Is_In in_vector) on_problems=Report_Error . at "Y" . to_vector . should_equal expected_neg_vector
|
||||
t2.filter "Y" (Filter_Condition.Is_In in_column) on_problems=Report_Error . at "Y" . to_vector . should_equal expected_neg_vector
|
||||
|
||||
|
@ -111,11 +111,23 @@ spec = Test.group "List" <|
|
||||
|
||||
txt = ["aaa", "bbb", "abab", "cccc", "baaa", "ś"].to_list
|
||||
txt.filter (Filter_Condition.Contains "a") . should_equal ["aaa", "abab", "baaa"].to_list
|
||||
txt.filter (Filter_Condition.Contains 'A' Case_Sensitivity.Sensitive) . should_equal [].to_list
|
||||
txt.filter (Filter_Condition.Contains 'A' Case_Sensitivity.Insensitive) . should_equal ["aaa", "abab", "baaa"].to_list
|
||||
txt.filter (Filter_Condition.Contains 's\u0301') . should_equal ["ś"].to_list
|
||||
txt.filter (Filter_Condition.Contains 'S\u0301' Case_Sensitivity.Sensitive) . should_equal [].to_list
|
||||
txt.filter (Filter_Condition.Contains 'S\u0301' Case_Sensitivity.Insensitive) . should_equal ["ś"].to_list
|
||||
txt.filter (Filter_Condition.Not_Contains "a") . should_equal ["bbb", "cccc", "ś"].to_list
|
||||
txt.filter (Filter_Condition.Not_Contains "A" Case_Sensitivity.Sensitive) . should_equal ["aaa", "bbb", "abab", "cccc", "baaa", "ś"].to_list
|
||||
txt.filter (Filter_Condition.Not_Contains "A" Case_Sensitivity.Insensitive) . should_equal ["bbb", "cccc", "ś"].to_list
|
||||
txt.filter (Filter_Condition.Not_Contains 's\u0301') . should_equal ["aaa", "bbb", "abab", "cccc", "baaa"].to_list
|
||||
txt.filter (Filter_Condition.Not_Contains 'S\u0301' Case_Sensitivity.Sensitive) . should_equal ["aaa", "bbb", "abab", "cccc", "baaa", "ś"].to_list
|
||||
txt.filter (Filter_Condition.Not_Contains 'S\u0301' Case_Sensitivity.Insensitive) . should_equal ["aaa", "bbb", "abab", "cccc", "baaa"].to_list
|
||||
txt.filter (Filter_Condition.Starts_With "a") . should_equal ["aaa", "abab"].to_list
|
||||
txt.filter (Filter_Condition.Starts_With "A" Case_Sensitivity.Sensitive) . should_equal [].to_list
|
||||
txt.filter (Filter_Condition.Starts_With "A" Case_Sensitivity.Insensitive) . should_equal ["aaa", "abab"].to_list
|
||||
txt.filter (Filter_Condition.Ends_With "a") . should_equal ["aaa", "baaa"].to_list
|
||||
txt.filter (Filter_Condition.Ends_With "A" Case_Sensitivity.Sensitive) . should_equal [].to_list
|
||||
txt.filter (Filter_Condition.Ends_With "A" Case_Sensitivity.Insensitive) . should_equal ["aaa", "baaa"].to_list
|
||||
txt.filter (Filter_Condition.Less than="a") . should_equal List.Nil
|
||||
txt.filter (Filter_Condition.Greater than="b") . should_equal ["bbb", "cccc", "baaa", "ś"].to_list
|
||||
txt.filter (Filter_Condition.Between "b" "c") . should_equal ["bbb", "baaa"].to_list
|
||||
|
@ -269,8 +269,8 @@ spec =
|
||||
foo
|
||||
bar\r\tbaz
|
||||
text_1.pretty.should_equal "'foo\nbar\r\tbaz'"
|
||||
text_2 = '\n\t\a\b\f\r\v\e\''
|
||||
text_2.pretty.should_equal "'\n\t\a\b\f\r\v\e\''"
|
||||
text_2 = '\n\t\a\b\f\r\v\e\'\\'
|
||||
text_2.pretty.should_equal "'\n\t\a\b\f\r\v\e\'\\'"
|
||||
|
||||
Test.specify "should return text as is when converting to text" <|
|
||||
text_1 = '''
|
||||
|
@ -50,8 +50,8 @@ foreign python generate_nested_py_array = """
|
||||
return [[1, 2, 3], [4, 5]]
|
||||
|
||||
spec = Test.group "Vectors" <|
|
||||
pending_python_missing = if Polyglot.is_language_installed "python" then Nothing else """
|
||||
Can't run Python tests, Python is not installed.
|
||||
pending_python_missing = if Polyglot.is_language_installed "python" then Nothing else
|
||||
"Can't run Python tests, Python is not installed."
|
||||
|
||||
Test.specify "text bytes" <|
|
||||
"Lore".utf_8 . should_equal [76, 111, 114, 101]
|
||||
@ -191,11 +191,23 @@ spec = Test.group "Vectors" <|
|
||||
|
||||
txtvec = ["aaa", "bbb", "abab", "cccc", "baaa", "ś"]
|
||||
txtvec.filter (Filter_Condition.Contains "a") . should_equal ["aaa", "abab", "baaa"]
|
||||
txtvec.filter (Filter_Condition.Contains 'A' Case_Sensitivity.Sensitive) . should_equal []
|
||||
txtvec.filter (Filter_Condition.Contains 'A' Case_Sensitivity.Insensitive) . should_equal ["aaa", "abab", "baaa"]
|
||||
txtvec.filter (Filter_Condition.Contains 's\u0301') . should_equal ["ś"]
|
||||
txtvec.filter (Filter_Condition.Contains 'S\u0301' Case_Sensitivity.Sensitive) . should_equal []
|
||||
txtvec.filter (Filter_Condition.Contains 'S\u0301' Case_Sensitivity.Insensitive) . should_equal ["ś"]
|
||||
txtvec.filter (Filter_Condition.Not_Contains "a") . should_equal ["bbb", "cccc", "ś"]
|
||||
txtvec.filter (Filter_Condition.Not_Contains "A" Case_Sensitivity.Sensitive) . should_equal ["aaa", "bbb", "abab", "cccc", "baaa", "ś"]
|
||||
txtvec.filter (Filter_Condition.Not_Contains "A" Case_Sensitivity.Insensitive) . should_equal ["bbb", "cccc", "ś"]
|
||||
txtvec.filter (Filter_Condition.Not_Contains 's\u0301') . should_equal ["aaa", "bbb", "abab", "cccc", "baaa"]
|
||||
txtvec.filter (Filter_Condition.Not_Contains 'S\u0301' Case_Sensitivity.Sensitive) . should_equal ["aaa", "bbb", "abab", "cccc", "baaa", "ś"]
|
||||
txtvec.filter (Filter_Condition.Not_Contains 'S\u0301' Case_Sensitivity.Insensitive) . should_equal ["aaa", "bbb", "abab", "cccc", "baaa"]
|
||||
txtvec.filter (Filter_Condition.Starts_With "a") . should_equal ["aaa", "abab"]
|
||||
txtvec.filter (Filter_Condition.Starts_With "A" Case_Sensitivity.Sensitive) . should_equal []
|
||||
txtvec.filter (Filter_Condition.Starts_With "A" Case_Sensitivity.Insensitive) . should_equal ["aaa", "abab"]
|
||||
txtvec.filter (Filter_Condition.Ends_With "a") . should_equal ["aaa", "baaa"]
|
||||
txtvec.filter (Filter_Condition.Ends_With "A" Case_Sensitivity.Sensitive) . should_equal []
|
||||
txtvec.filter (Filter_Condition.Ends_With "A" Case_Sensitivity.Insensitive) . should_equal ["aaa", "baaa"]
|
||||
txtvec.filter (Filter_Condition.Less than="a") . should_equal []
|
||||
txtvec.filter (Filter_Condition.Greater than="b") . should_equal ["bbb", "cccc", "baaa", "ś"]
|
||||
txtvec.filter (Filter_Condition.Between "b" "c") . should_equal ["bbb", "baaa"]
|
||||
|
@ -37,7 +37,7 @@ spec =
|
||||
Widgets.get_full_annotations_json mock_table "at" ["selector"] . should_equal expect
|
||||
|
||||
Test.specify "works for `filter`" <|
|
||||
expect = [["column", Widget.Single_Choice (mock_table.column_names.map .pretty) Nothing Display.Always], ["filter", Nothing], ["on_problems", Nothing]] . to_json
|
||||
Widgets.get_full_annotations_json mock_table "filter" ["column", "filter", "on_problems"] . should_equal expect
|
||||
expect = [["column", Widget.Single_Choice (mock_table.column_names.map .pretty) Nothing Display.Always]] . to_json
|
||||
Widgets.get_full_annotations_json mock_table "filter" ["column"] . should_equal expect
|
||||
|
||||
main = Test_Suite.run_main spec
|
||||
|
@ -15,12 +15,12 @@ spec =
|
||||
mock_table = Table.from_rows ["A", "B C", 'D "E"', "F.G/H\I"] []
|
||||
|
||||
Test.specify "works for `get` and `at`" <|
|
||||
expect = [["selector", Widget.Single_Choice (mock_table.column_names.map .pretty) Nothing Display.Always]] . to_json
|
||||
expect = [["selector", Widget.Single_Choice ["'A'", "'B C'", '\'D "E"\'', "'F.G/H\\I'"] Nothing Display.Always]] . to_json
|
||||
Widgets.get_full_annotations_json mock_table "get" ["selector"] . should_equal expect
|
||||
Widgets.get_full_annotations_json mock_table "at" ["selector"] . should_equal expect
|
||||
|
||||
Test.specify "works for `filter`" <|
|
||||
expect = [["column", Widget.Single_Choice (mock_table.column_names.map .pretty) Nothing Display.Always], ["filter", Nothing], ["on_problems", Nothing]] . to_json
|
||||
Widgets.get_full_annotations_json mock_table "filter" ["column", "filter", "on_problems"] . should_equal expect
|
||||
expect = [["column", Widget.Single_Choice ["'A'", "'B C'", '\'D "E"\'', "'F.G/H\\I'"] Nothing Display.Always]] . to_json
|
||||
Widgets.get_full_annotations_json mock_table "filter" ["column"] . should_equal expect
|
||||
|
||||
main = Test_Suite.run_main spec
|
||||
|
Loading…
Reference in New Issue
Block a user