Table with no columns is not valid, No_Output_Columns is always an error (#4073)

Implements https://www.pivotaltracker.com/story/show/184226020
This commit is contained in:
Radosław Waśko 2023-01-25 03:40:23 +01:00 committed by GitHub
parent 366f231765
commit 778d28fba3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 216 additions and 193 deletions

View File

@ -1061,7 +1061,7 @@ type Builder
unsafe_append : Any -> Nothing
unsafe_append self item = self.java_builder.add item
## Gets an element from the vector at a specified index (0-based).
## Gets an element from the vector builder at a specified index (0-based).
Arguments:
- index: The location in the vector to get the element from. The index is

View File

@ -58,7 +58,7 @@ type Warning
attached warnings are unaffected.
mapper warning = case warning of
Matching.No_Matches_Found criteria -> Maybe.Some (Missing_Input_Columns criteria)
Matching.No_Matches_Found criteria -> Maybe.Some (Missing_Input_Columns.Error criteria)
_ -> Nothing
Warning.map_attached_warnings mapper value
map_attached_warnings : (Any -> Maybe Any) -> Any -> Any
@ -80,7 +80,7 @@ type Warning
other attached warnings or errors are unaffected.
mapper error = case error of
Matching.No_Matches_Found criteria -> Maybe.Some (Missing_Input_Columns criteria)
Matching.No_Matches_Found criteria -> Maybe.Some (Missing_Input_Columns.Error criteria)
_ -> Nothing
Warning.map_warnings_and_errors mapper value
map_warnings_and_errors : (Any -> Maybe Any) -> Any -> Any

View File

@ -141,7 +141,9 @@ type Table
- If two distinct indices would refer to the same column, a
`Input_Indices_Already_Matched`, indicating that the additional
indices will not introduce additional columns.
- If there are no columns in the output table, a `No_Output_Columns`.
- If there are no columns in the output table, a `No_Output_Columns` is
raised as an error regardless of the problem behavior, because it is
not possible to create a table without any columns.
> Example
Select columns by name.
@ -193,7 +195,9 @@ type Table
- If two distinct indices would refer to the same column, a
`Input_Indices_Already_Matched`, indicating that the additional
indices will not introduce additional columns.
- If there are no columns in the output table, a `No_Output_Columns`.
- If there are no columns in the output table, a `No_Output_Columns` is
raised as an error regardless of the problem behavior, because it is
not possible to create a table without any columns.
> Example
Remove columns with given names.
@ -670,17 +674,14 @@ type Table
`Floating_Point_Grouping` warning.
distinct : Vector Text | Column_Selector -> Case_Sensitivity -> Problem_Behavior -> Table
distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Sensitive on_problems=Report_Warning =
problem_builder = Problem_Builder.new
warning_mapper error = case error of
No_Output_Columns -> Maybe.Some No_Input_Columns_Selected
_ -> Nothing
key_columns = Warning.map_warnings_and_errors warning_mapper <|
self.columns_helper.select_columns selector=columns reorder=True on_problems=on_problems
key_columns = self.columns_helper.select_columns selector=columns reorder=True on_problems=Problem_Behavior.Report_Error . catch No_Output_Columns _->
Error.throw No_Input_Columns_Selected
text_case_insensitive = case case_sensitivity of
Case_Sensitivity.Sensitive -> False
Case_Sensitivity.Insensitive locale ->
Helpers.assume_default_locale locale <|
True
problem_builder = Problem_Builder.new
new_table = self.connection.dialect.prepare_distinct self key_columns text_case_insensitive problem_builder
problem_builder.attach_problems_before on_problems new_table
@ -982,7 +983,9 @@ type Table
The following problems can occur:
- If a column name is not in the input table, a `Missing_Input_Columns`.
- If a column index is out of range, a `Column_Indexes_Out_Of_Range`.
- If there are no valid columns in the output table, a `No_Output_Columns`.
- If there are no columns in the output table, a `No_Output_Columns` is
raised as an error regardless of the problem behavior, because it is
not possible to create a table without any columns.
- If there are invalid column names in the output table, a `Invalid_Output_Column_Names`.
- If there are duplicate column names in the output table, a `Duplicate_Output_Column_Names`.
- If grouping on or computing the `Mode` on a floating point number, a `Floating_Point_Grouping`.
@ -1343,8 +1346,9 @@ type Table
# make_table : Connection -> Text -> Vector [Text, SQL_Type] -> Context -> Table
make_table : Connection -> Text -> Vector -> Context -> Table
make_table connection table_name columns ctx =
cols = columns.map (p -> Internal_Column.Value p.first p.second (SQL_Expression.Column table_name p.first))
Table.Value table_name connection cols ctx
if columns.is_empty then Error.throw (Illegal_State.Error "Unexpectedly attempting to create a Database Table with no columns. This is a bug in the Database library.") else
cols = columns.map (p -> Internal_Column.Value p.first p.second (SQL_Expression.Column table_name p.first))
Table.Value table_name connection cols ctx
## PRIVATE

View File

@ -79,11 +79,10 @@ type Table
case c of
_ : Vector -> Column.from_vector (c.at 0) (c.at 1) . java_column
Column.Value java_col -> java_col
# TODO enable this once we stop returning tables without columns
# if cols.is_empty then Error.throw (Illegal_Argument.Error "Cannot create a table with no columns.") else
if (cols.all c-> c.getSize == cols.first.getSize).not then Error.throw (Illegal_Argument.Error "All columns must have the same row count.") else
if cols.distinct .getName . length != cols.length then Error.throw (Illegal_Argument.Error "Column names must be distinct.") else
Table.Value (Java_Table.new cols.to_array)
if cols.is_empty then Error.throw (Illegal_Argument.Error "Cannot create a table with no columns.") else
if (cols.all c-> c.getSize == cols.first.getSize).not then Error.throw (Illegal_Argument.Error "All columns must have the same row count.") else
if cols.distinct .getName . length != cols.length then Error.throw (Illegal_Argument.Error "Column names must be distinct.") else
Table.Value (Java_Table.new cols.to_array)
## Creates a new table from a vector of column names and a vector of vectors
specifying row contents.
@ -256,7 +255,9 @@ type Table
- If two distinct indices refer to the same column, an
`Input_Indices_Already_Matched`, with the column included the first
time it is matched.
- If there are no columns in the output table, a `No_Output_Columns`.
- If there are no columns in the output table, a `No_Output_Columns` is
raised as an error regardless of the problem behavior, because it is
not possible to create a table without any columns.
> Example
Select columns by name.
@ -308,7 +309,9 @@ type Table
- If two distinct indices refer to the same column, an
`Input_Indices_Already_Matched`, with the column included the first
time it is matched.
- If there are no columns in the output table, a `No_Output_Columns`.
- If there are no columns in the output table, a `No_Output_Columns` is
raised as an error regardless of the problem behavior, because it is
not possible to create a table without any columns.
> Example
Remove columns with given names.
@ -515,7 +518,9 @@ type Table
The following problems can occur:
- If a column name is not in the input table, a `Missing_Input_Columns`.
- If a column index is out of range, a `Column_Indexes_Out_Of_Range`.
- If there are no valid columns in the output table, a `No_Output_Columns`.
- If there are no columns in the output table, a `No_Output_Columns` is
raised as an error regardless of the problem behavior, because it is
not possible to create a table without any columns.
- If there are invalid column names in the output table, a `Invalid_Output_Column_Names`.
- If there are duplicate column names in the output table, a `Duplicate_Output_Column_Names`.
- If grouping on or computing the `Mode` on a floating point number, a `Floating_Point_Grouping`.
@ -676,11 +681,8 @@ type Table
`Floating_Point_Grouping` warning.
distinct : Vector Text | Column_Selector -> Case_Sensitivity -> Problem_Behavior -> Table
distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Sensitive on_problems=Report_Warning =
warning_mapper error = case error of
No_Output_Columns -> Maybe.Some No_Input_Columns_Selected
_ -> Nothing
key_columns = Warning.map_warnings_and_errors warning_mapper <|
self.columns_helper.select_columns selector=columns reorder=True on_problems=on_problems
key_columns = self.columns_helper.select_columns selector=columns reorder=True on_problems=Problem_Behavior.Report_Error . catch No_Output_Columns _->
Error.throw No_Input_Columns_Selected
java_columns = key_columns.map .java_column
text_folding_strategy = Case_Sensitivity.folding_strategy case_sensitivity
java_table = Illegal_Argument.handle_java_exception <|

View File

@ -12,9 +12,12 @@ import project.Delimited.Quote_Style.Quote_Style
type Delimited_Format
## Read delimited files such as CSVs into a Table.
If a row does not match the first row's column count, the function raises
an `Invalid_Row`. If a quote is opened and never closed, a
`Mismatched_Quote` warning occurs.
When reading this format, the following problems may occur:
- If a row does not match the first row's column count, the function
raises an `Invalid_Row`.
- If a quote is opened and never closed, a `Mismatched_Quote` problem is
reported.
- If an empty file is passed, an `Empty_File_Error` is thrown.
Arguments:
- delimiter: The delimiter character to split the file into columns. An

View File

@ -8,7 +8,7 @@ import project.Data.Data_Formatter.Data_Formatter
import project.Delimited.Delimited_Format.Delimited_Format
import project.Delimited.Quote_Style.Quote_Style
import project.Internal.Java_Problems
from project.Errors import Additional_Warnings, Parser_Error
from project.Errors import Additional_Warnings, Parser_Error, Empty_File_Error
polyglot java import org.enso.base.encoding.NewlineDetector
polyglot java import org.enso.table.read.DelimitedReader
@ -85,7 +85,7 @@ read_stream format stream on_problems max_columns=default_max_columns related_fi
integer.
read_from_reader : Delimited_Format -> Reader -> Problem_Behavior -> Integer -> Any
read_from_reader format java_reader on_problems max_columns=4096 =
Illegal_Argument.handle_java_exception <| handle_parsing_failure <| handle_parsing_exception <|
Illegal_Argument.handle_java_exception <| handle_parsing_failure <| handle_parsing_exception <| Empty_File_Error.handle_java_exception <|
reader = prepare_reader java_reader format max_columns on_problems
result_with_problems = reader.read
parsing_problems = Vector.from_polyglot_array (result_with_problems.problems) . map Java_Problems.translate_problem

View File

@ -2,6 +2,9 @@ from Standard.Base import all
polyglot java import org.enso.table.error.ColumnCountMismatchException
polyglot java import org.enso.table.error.ColumnNameMismatchException
polyglot java import org.enso.table.error.EmptyFileException
polyglot java import org.enso.table.error.EmptySheetException
type Missing_Input_Columns
## PRIVATE
@ -215,6 +218,26 @@ type Leading_Zeros
to_display_text : Text
to_display_text self = "Leading zeros in column "+self.column+" with datatype "+self.datatype.to_text+"."
## Indicates that an empty file was encountered, so no data could be loaded.
type Empty_File_Error
to_display_text : Text
to_display_text = "It is not allowed to create a Table with no columns, so an empty file could not have been loaded."
## PRIVATE
handle_java_exception =
Panic.catch EmptyFileException handler=(_ -> Error.throw Empty_File_Error)
## Indicates that an empty sheet was encountered, so no data could be loaded.
type Empty_Sheet_Error
to_display_text : Text
to_display_text = "It is not allowed to create a Table with no columns, so an empty sheet could not have been loaded."
## PRIVATE
handle_java_exception =
Panic.catch EmptySheetException handler=(_ -> Error.throw Empty_Sheet_Error)
## Indicates that multiple `Column_Type_Selector` match the same column.
If all matching selectors indicate the same type, the warning is reported but

View File

@ -6,7 +6,7 @@ import project.Data.Table.Table
import project.Excel.Excel_Range.Excel_Range
import project.Excel.Excel_Section.Excel_Section
from project.Errors import Invalid_Location, Duplicate_Output_Column_Names, Invalid_Output_Column_Names
from project.Errors import Invalid_Location, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Empty_Sheet_Error
polyglot java import org.enso.table.excel.ExcelHeaders
polyglot java import org.enso.table.read.ExcelReader
@ -45,7 +45,7 @@ handle_reader file reader =
bad_argument caught_panic = Error.throw (Invalid_Location.Error caught_panic.payload.getCause)
handle_bad_argument = Panic.catch InvalidLocationException handler=bad_argument
File_Error.handle_java_exceptions file <| handle_bad_argument <| handle_bad_format <|
File_Error.handle_java_exceptions file <| handle_bad_argument <| handle_bad_format <| Empty_Sheet_Error.handle_java_exception <|
file.with_input_stream [File_Access.Read] stream->
stream.with_java_stream reader

View File

@ -70,11 +70,11 @@ prepare_aggregate_columns aggregates table =
Pair.new new_name agg
# Build Problems Output
if renamed_columns.is_empty then
problem_builder.report_other_warning No_Output_Columns
problem_builder.report_unique_name_strategy unique
Validated_Aggregate_Columns.Value unique_key_columns renamed_columns problem_builder.build_problemset
case renamed_columns.is_empty of
True -> Error.throw No_Output_Columns
False ->
problem_builder.report_unique_name_strategy unique
Validated_Aggregate_Columns.Value unique_key_columns renamed_columns problem_builder.build_problemset
## PRIVATE
Defines the default name of an `Aggregate_Column`.

View File

@ -2,13 +2,11 @@ from Standard.Base import all
import Standard.Base.Error.Common.Index_Out_Of_Bounds
import Standard.Base.Error.Illegal_State.Illegal_State
from project.Errors import Invalid_Value_Type
from project.Errors import Invalid_Value_Type, No_Such_Column, Missing_Input_Columns, Column_Indexes_Out_Of_Range
import project.Data.Join_Condition.Join_Condition
import project.Data.Value_Type.Value_Type
import project.Internal.Problem_Builder.Problem_Builder
import project.Errors.No_Such_Column
type Join_Condition_Resolver
## PRIVATE
Value left_at right_at make_equals make_equals_ignore_case make_between
@ -25,7 +23,7 @@ type Join_Condition_Resolver
resolve : Join_Condition | Text | Vector (Join_Condition | Text) -> Problem_Behavior -> Join_Condition_Resolution
resolve self conditions on_problems =
redundant_names = Vector.new_builder
problem_builder = Problem_Builder.new
problem_builder = Problem_Builder.new types_to_always_throw=[Missing_Input_Columns, Column_Indexes_Out_Of_Range]
resolve_selector resolver selector =
r_1 = resolver selector
@ -66,10 +64,9 @@ type Join_Condition_Resolver
right_upper = resolve_right right_upper_selector
if left.is_nothing || right_lower.is_nothing || right_upper.is_nothing then Nothing else
self.make_between problem_builder left right_lower right_upper
problem_builder.throw_unmatched_columns_as_error <|
problem_builder.attach_problems_before on_problems <|
if converted.contains Nothing then Panic.throw (Illegal_State.Error "Impossible: unresolved columns remaining in the join resolution. This should have raised a dataflow error. This is a bug in the Table library.") else
problem_builder.attach_problems_after on_problems <|
Join_Condition_Resolution.Result converted redundant_names.to_vector
Join_Condition_Resolution.Result converted redundant_names.to_vector
type Join_Condition_Resolution
Result (conditions : Vector Any) (redundant_column_names : Vector Text)

View File

@ -6,7 +6,7 @@ import project.Internal.Vector_Builder.Vector_Builder
from project.Errors import Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Column_Selectors, Input_Indices_Already_Matched, Column_Matched_By_Multiple_Selectors, Duplicate_Output_Column_Names, Invalid_Output_Column_Names
type Problem_Builder
Value oob_indices duplicate_column_selectors input_indices_already_matched missing_input_columns other
Value types_to_always_throw oob_indices duplicate_column_selectors input_indices_already_matched missing_input_columns other
report_oob_indices self indices =
append_to_ref self.oob_indices indices
@ -41,48 +41,55 @@ type Problem_Builder
if vec.not_empty then
problems.append (problem_creator vec)
build_vector_and_append self.missing_input_columns Missing_Input_Columns.Error
build_vector_and_append self.oob_indices Column_Indexes_Out_Of_Range.Error
build_vector_and_append self.duplicate_column_selectors Duplicate_Column_Selectors.Error
build_vector_and_append self.input_indices_already_matched Input_Indices_Already_Matched.Error
build_vector_and_append self.missing_input_columns Missing_Input_Columns.Error
self.other.to_vector.each problems.append
problems.to_vector
## If there are any missing columns or out of bound indices, the
corresponding error is raised, otherwise, the provided action is executed
and its result is returned.
throw_unmatched_columns_as_error : Any -> Any
throw_unmatched_columns_as_error self ~action =
case self.missing_input_columns.get.not_empty of
True ->
err = Missing_Input_Columns.Error self.missing_input_columns.get.build
Error.throw err
False -> case self.oob_indices.get.not_empty of
True ->
err = Column_Indexes_Out_Of_Range.Error self.oob_indices.get.build
Error.throw err
False -> action
## Attaches gathered warnings to the result.
Any errors from the `result` take precedence over the ones owned by this
builder.
attach_problems_after : Problem_Behavior -> Any -> Any
attach_problems_after self problem_behavior result =
problem_behavior.attach_problems_after result self.build_problemset
case result of
_ -> case self.get_problemset_throwing_distinguished_errors of
problems ->
problem_behavior.attach_problems_after result problems
## Attaches gathered warnings to the result of the provided computation.
If in `Report_Error` mode and there are any problems gathered, the first
one will be returned as error without even running the computation.
attach_problems_before : Problem_Behavior -> Any -> Any
attach_problems_before self problem_behavior ~computation =
problem_behavior.attach_problems_before self.build_problemset computation
case self.get_problemset_throwing_distinguished_errors of
problems ->
problem_behavior.attach_problems_before problems computation
## PRIVATE
Returns the generated problem set if no errors force throwing or throws
the first error that is meant to be thrown regardless of problem behavior.
get_problemset_throwing_distinguished_errors : Vector
get_problemset_throwing_distinguished_errors self =
problems = self.build_problemset
distinguished_problem = problems.find if_missing=Nothing problem->
self.types_to_always_throw.any tpe->
problem.is_a tpe
case distinguished_problem of
Nothing -> problems
problem -> Error.throw problem
## PRIVATE
Creates a new helper object for aggregating problems to report.
new : Problem_Builder
new =
Problem_Builder.Value (Ref.new Vector_Builder.empty) (Ref.new Vector_Builder.empty) (Ref.new Vector_Builder.empty) (Ref.new Vector_Builder.empty) other=Vector.new_builder
An optional list of types of can be provided. Problems matching these
types will be raised as an error by the `attach_` methods regardless of
the `Problem_Behavior` used.
new : Vector -> Problem_Builder
new types_to_always_throw=[] =
Problem_Builder.Value types_to_always_throw (Ref.new Vector_Builder.empty) (Ref.new Vector_Builder.empty) (Ref.new Vector_Builder.empty) (Ref.new Vector_Builder.empty) other=Vector.new_builder
## PRIVATE
Appends a `Vector` to a `Vector_Builder` stored in a `Ref`.

View File

@ -61,9 +61,8 @@ type Table_Column_Helper
select_columns self selector reorder on_problems =
problem_builder = Problem_Builder.new
result = self.select_columns_helper selector reorder problem_builder
if result.is_empty then
problem_builder.report_other_warning No_Output_Columns
problem_builder.attach_problems_after on_problems result
problem_builder.attach_problems_before on_problems <|
if result.is_empty then Error.throw No_Output_Columns else result
## PRIVATE
A helper function encapsulating shared code for `remove_columns`
@ -88,9 +87,8 @@ type Table_Column_Helper
result = self.internal_columns.filter column->
should_be_removed = selected_names.get column.name False
should_be_removed.not
if result.is_empty then
problem_builder.report_other_warning No_Output_Columns
problem_builder.attach_problems_after on_problems result
if result.is_empty then Error.throw No_Output_Columns else
problem_builder.attach_problems_after on_problems result
## PRIVATE
A helper function encapsulating shared code for `reorder_columns`
@ -168,14 +166,29 @@ type Table_Column_Helper
## We cannot just use a custom_column in the aggregate because of
how the column selector works. We may need to revisit this. For
now we need to use tricks like that:
modified_table = blanks.fold (self.table.select_columns [] on_problems=Problem_Behavior.Ignore) table-> blanks_col->
To be backend agnostic, we cannot create a new table with the
columns above just out of thin air (actually we may want to allow
this in the future if all columns come from the same context, but
currently it's not possible). Instead, we add our blank columns
to the current table and then remove any other columns we are not
interested in. Note that we do not have to care about potential
name conflicts, as we are dropping any other columns anyway, and
adding a new column with a clashing name will not affect any
other columns computed from the old column with that name.
table_with_blank_indicators = blanks.fold self.table table-> blanks_col->
table.set blanks_col.name blanks_col
just_indicators = table_with_blank_indicators.select_columns (blanks.map .name) on_problems=Problem_Behavior.Report_Error
# Maximum is equivalent to Exists and Minimum is equivalent to Forall.
col_aggregate = if when_any then Maximum _ else Minimum _
aggregates = blanks.map blanks_col-> col_aggregate blanks_col.name
result = self.materialize <| Panic.rethrow <|
modified_table.aggregate aggregates on_problems=Problem_Behavior.Report_Error
counts = result.rows.first
aggregate_result = just_indicators.aggregate aggregates on_problems=Problem_Behavior.Report_Error
materialized_result = self.materialize <| aggregate_result.catch Any error->
msg = "Unexpected dataflow error has been thrown in an `select_columns_helper`. This is a bug in the Table library. The unexpected error was: "+error.to_display_text
Panic.throw (Illegal_State.Error message=msg cause=error)
counts = materialized_result.rows.first
## The `reorder` argument has no meaning for Blank_Columns selector
- either way all blank columns are selected in the order that

View File

@ -5,13 +5,13 @@ import java.util.List;
public class Array_Builder<T> {
private static final Object[] EMPTY_ARRAY = new Object[0];
private final int capacity;
private final int initialCapacity;
private int size;
private Object primitiveArray;
private Object[] objectArray;
private Array_Builder(int capacity) {
this.capacity = Math.max(1, capacity);
private Array_Builder(int initialCapacity) {
this.initialCapacity = Math.max(1, initialCapacity);
}
/**
@ -79,15 +79,15 @@ public class Array_Builder<T> {
assert primitiveArray == null;
assert size == 0;
if (e instanceof Long l) {
var arr = new long[capacity];
var arr = new long[initialCapacity];
arr[0] = l;
primitiveArray = arr;
} else if (e instanceof Double d) {
var arr = new double[capacity];
var arr = new double[initialCapacity];
arr[0] = d;
primitiveArray = arr;
} else {
var arr = new Object[capacity];
var arr = new Object[initialCapacity];
arr[0] = e;
objectArray = arr;
}

View File

@ -39,18 +39,17 @@ public class Table {
}
public Table(Column[] columns, AggregatedProblems problems) {
if (columns.length == 0) {
throw new IllegalArgumentException("A Table must have at least one column.");
}
this.columns = columns;
this.problems = problems;
}
/** @return the number of rows in this table */
public int rowCount() {
// TODO I think we can make this check obsolete once we start requiring >=1 column in tables.
if (columns.length == 0) {
return 0;
} else {
return columns[0].getSize();
}
return columns[0].getSize();
}
/** @return the columns of this table */

View File

@ -0,0 +1,7 @@
package org.enso.table.error;
public class EmptyFileException extends RuntimeException {
public EmptyFileException() {
super("Cannot parse an empty file.");
}
}

View File

@ -0,0 +1,7 @@
package org.enso.table.error;
public class EmptySheetException extends RuntimeException {
public EmptySheetException() {
super("Cannot parse an empty sheet.");
}
}

View File

@ -7,6 +7,7 @@ import org.enso.table.data.column.builder.string.StringStorageBuilder;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
import org.enso.table.error.EmptyFileException;
import org.enso.table.parsing.DatatypeParser;
import org.enso.table.parsing.TypeInferringParser;
import org.enso.table.parsing.problems.AdditionalInvalidRows;
@ -438,7 +439,12 @@ public class DelimitedReader {
/** Reads the input stream and returns a Table. */
public WithProblems<Table> read() {
ensureHeadersDetected();
initBuilders(getColumnCount());
int columnCount = getColumnCount();
if (columnCount == 0) {
throw new EmptyFileException();
}
initBuilders(columnCount);
while (canFitMoreRows()) {
var currentRow = readNextRow();
if (currentRow == null) break;

View File

@ -10,6 +10,7 @@ import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.storage.ObjectStorage;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
import org.enso.table.error.EmptySheetException;
import org.enso.table.error.InvalidLocationException;
import org.enso.table.excel.ExcelHeaders;
import org.enso.table.excel.ExcelRange;
@ -312,6 +313,10 @@ public class ExcelReader {
.mapToObj(idx -> new Column(excelHeaders.get(idx + startCol), builders.get(idx).seal()))
.toArray(Column[]::new);
if (columns.length == 0) {
throw new EmptySheetException();
}
return new WithProblems<>(new Table(columns), excelHeaders.getProblems());
}

View File

@ -1192,10 +1192,9 @@ spec setup =
table_builder [col1, col2]
Test.specify "should raise a warning when there are no output columns" <|
action = table.aggregate [] on_problems=_
problems = [No_Output_Columns]
tester = expect_column_names []
Problems.test_problem_handling action problems tester
[Problem_Behavior.Ignore, Problem_Behavior.Report_Warning, Problem_Behavior.Report_Error].each pb->
t = table.aggregate [] on_problems=pb
t.should_fail_with No_Output_Columns
Test.specify "should raise a warning when can't find a column by name" <|
action = table.aggregate [Group_By "Missing", Group_By "Index"] on_problems=_
@ -1209,10 +1208,10 @@ spec setup =
tester = expect_column_names ["Index"]
Problems.test_problem_handling action problems tester
Test.specify "should raise warnings when an invalid column index and no valid output" <|
action = table.aggregate [Group_By -3] on_problems=_
problems = [Column_Indexes_Out_Of_Range.Error [-3], No_Output_Columns]
tester = expect_column_names []
Test.specify "should raise warnings when grouped by an invalid column index" <|
action = table.aggregate [Group_By -3, Count] on_problems=_
problems = [Column_Indexes_Out_Of_Range.Error [-3]]
tester = expect_column_names ["Count"]
Problems.test_problem_handling action problems tester
Test.specify "should raise a warning when an invalid output name" <|
@ -1240,9 +1239,9 @@ spec setup =
Problems.test_problem_handling action problems tester
Test.specify "should ignore Count_Distinct if no columns matched" <|
action = table.aggregate [Count_Distinct (Column_Selector.By_Index [-100])] on_problems=_
problems = [Column_Indexes_Out_Of_Range.Error [-100], No_Output_Columns]
tester = expect_column_names []
action = table.aggregate [Count_Distinct (Column_Selector.By_Index [-100]), Count] on_problems=_
problems = [Column_Indexes_Out_Of_Range.Error [-100]]
tester = expect_column_names ["Count"]
Problems.test_problem_handling action problems tester
Test.group prefix+"Table.aggregate should raise warnings when there are issues computing aggregation" pending=(resolve_pending test_selection.aggregation_problems) <|

View File

@ -1,7 +1,7 @@
from Standard.Base import all
from Standard.Table import Column_Selector, Sort_Column, Sort_Column_Selector
from Standard.Table.Errors import Floating_Point_Grouping
from Standard.Table.Errors import all
from Standard.Test import Test, Problems
import Standard.Test.Extensions
@ -91,3 +91,21 @@ spec setup =
va.at 2 . should_equal "b"
[3, 5, 8].contains (vb.at 2) . should_be_true
Test.specify "should report missing input columns as errors regardless of on_problems" <|
t1 = table_builder [["X", [1, 2, 3]]]
[Problem_Behavior.Ignore, Problem_Behavior.Report_Warning, Problem_Behavior.Report_Error].each pb->
t2 = t1.distinct ["Y", "Z"] on_problems=pb
t2.should_fail_with Missing_Input_Columns
t2.catch . should_equal (Missing_Input_Columns.Error ["Y", "Z"])
t3 = t1.distinct (Column_Selector.By_Name ["X", "Y"]) on_problems=pb
t3.should_fail_with Missing_Input_Columns
t3.catch . should_equal (Missing_Input_Columns.Error ["Y"])
t4 = t1.distinct (Column_Selector.By_Index [0, 42]) on_problems=pb
t4.should_fail_with Column_Indexes_Out_Of_Range
t4.catch . should_equal (Column_Indexes_Out_Of_Range.Error [42])
t5 = t1.distinct [] on_problems=pb
t5.should_fail_with No_Input_Columns_Selected

View File

@ -52,17 +52,6 @@ spec setup =
t4.row_count . should_equal 0
t4.at "X" . to_vector . should_equal []
empty = table_builder [["X", [1, 2, 3]]] . select_columns []
empty.row_count . should_equal 0
empty.columns . should_equal []
t5 = empty.filter_blank_rows when_any=True
t5.row_count . should_equal 0
t5.columns . should_equal []
t6 = empty.filter_blank_rows when_any=False
t6.row_count . should_equal 0
t6.columns . should_equal []
Test.specify "should allow to remove blank columns" <|
r1 = t1.remove_columns (Column_Selector.Blank_Columns when_any=False)
r1.columns.map .name . should_equal ["a", "b", "c", "d", "e"]
@ -137,12 +126,9 @@ spec setup =
Test.specify "Blank_Columns selector should deal with edge cases" <|
t = table_builder [["X", [1, 2, 3, 4]]]
no_cols = t.select_columns []
no_rows = t.filter "X" (Filter_Condition.Equal to=0)
no_cols.columns . should_equal []
no_rows.row_count . should_equal 0
no_rows.at "X" . to_vector . should_equal []
no_cols.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal []
no_rows.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["X"]
no_rows.remove_columns Column_Selector.Blank_Columns . columns . map .name . should_equal []
no_rows.remove_columns Column_Selector.Blank_Columns . columns . map .name . should_fail_with No_Output_Columns

View File

@ -143,23 +143,17 @@ spec setup =
Problems.test_problem_handling action problems tester
Test.specify "should correctly handle problems: no columns in the output" <|
selector = By_Name []
action = table.select_columns selector on_problems=_
tester = expect_column_names []
problems = [No_Output_Columns]
Problems.test_problem_handling action problems tester
[Problem_Behavior.Ignore, Problem_Behavior.Report_Warning, Problem_Behavior.Report_Error].each pb->
t = table.select_columns (By_Index []) on_problems=pb
t.should_fail_with No_Output_Columns
Test.specify "should correctly handle multiple problems" <|
selector = By_Name ["hmmm"]
action = table.select_columns selector on_problems=_
tester = expect_column_names []
problems = [Missing_Input_Columns.Error ["hmmm"], No_Output_Columns]
Problems.test_problem_handling action problems tester
table.select_columns (By_Name ["hmmm"]) . should_fail_with No_Output_Columns
action_2 = table.select_columns (By_Index [0, -7, 0, 100]) on_problems=_
problems_2 = [Column_Indexes_Out_Of_Range.Error [100], Duplicate_Column_Selectors.Error [0], Input_Indices_Already_Matched.Error [-7]]
tester_2 = expect_column_names ["foo"]
Problems.test_problem_handling action_2 problems_2 tester_2
action = table.select_columns (By_Index [0, -7, 0, 100]) on_problems=_
problems = [Column_Indexes_Out_Of_Range.Error [100], Duplicate_Column_Selectors.Error [0], Input_Indices_Already_Matched.Error [-7]]
tester = expect_column_names ["foo"]
Problems.test_problem_handling action problems tester
Test.group prefix+"Table.remove_columns" <|
Test.specify "should work as shown in the doc examples" <|
@ -267,18 +261,15 @@ spec setup =
Problems.test_problem_handling action problems tester
Test.specify "should correctly handle problems: no columns in the output" <|
selector = By_Name [".*"] (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive)
action = table.remove_columns selector on_problems=_
tester = expect_column_names []
problems = [No_Output_Columns]
Problems.test_problem_handling action problems tester
[Problem_Behavior.Ignore, Problem_Behavior.Report_Warning, Problem_Behavior.Report_Error].each pb->
selector = By_Name [".*"] (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive)
t = table.remove_columns selector on_problems=pb
t.should_fail_with No_Output_Columns
Test.specify "should correctly handle multiple problems" <|
selector = By_Name [".*", "hmmm"] (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive)
action = table.remove_columns selector on_problems=_
tester = expect_column_names []
problems = [Missing_Input_Columns.Error ["hmmm"], No_Output_Columns]
Problems.test_problem_handling action problems tester
t1 = table.remove_columns selector
t1.should_fail_with No_Output_Columns
action_2 = table.remove_columns (By_Index [0, -7, 0, 100]) on_problems=_
problems_2 = [Column_Indexes_Out_Of_Range.Error [100], Duplicate_Column_Selectors.Error [0], Input_Indices_Already_Matched.Error [-7]]

View File

@ -61,13 +61,6 @@ spec =
t2 = t1.limit 5
t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" LIMIT 5', []]
Test.specify "should work correctly when there are no columns" <|
empty = t1.select_columns (Column_Selector.By_Name [])
json = JS_Object.from_pairs [["query", Nothing], ["message", "The table has no columns so a query cannot be generated."]] . to_text
empty.to_json . should_equal json
empty.column_count . should_equal 0
empty.to_sql . should_fail_with Unsupported_Database_Operation.Error
Test.group "[Codegen] Building Expressions" <|
Test.specify "should allow building expressions from columns and constants" <|
a = t1.at "A"

View File

@ -87,10 +87,9 @@ spec =
t2.row_count . should_equal 0
t2.at "x" . to_vector . should_equal []
Test.specify "should be able to load even an empty file" <|
table = Data.read (enso_project.data / "empty.txt") (Delimited "," headers=True value_formatter=Nothing)
table.columns.map .name . should_equal []
table.row_count . should_equal 0
Test.specify "should raise an informative error when loading an empty file" <|
t = Data.read (enso_project.data / "empty.txt") (Delimited "," headers=True value_formatter=Nothing)
t.should_fail_with Empty_File_Error
Test.specify "should correctly handle file opening issues" <|
nonexistent_file = enso_project.data / "a_filename_that_does_not_exist.foobar"

View File

@ -50,15 +50,6 @@ spec =
text.should_equal (lines.join separator suffix=separator)
file.delete
Test.specify "should be able to write an empty table" <|
table = Table.new []
file = (enso_project.data / "transient" / "empty.csv")
file.delete_if_exists
table.write file on_problems=Report_Error . should_succeed
text = Data.read_text file
text.should_equal ''
file.delete
Test.specify 'should quote values that contain the delimiter or quotes, in the [,""] variant' <|
data_formatter = Data_Formatter.Value decimal_point=","
table = Table.new [['The Column "Name"', ["foo","'bar'",'"baz"', 'one, two, three']], ["Hello, Column?", [1.0, 1000000.5, 2.2, -1.5]]]

View File

@ -4,7 +4,7 @@ import Standard.Base.Error.Illegal_Argument.Illegal_Argument
from Standard.Table import Table, Match_Columns, Column_Name_Mapping, Excel, Excel_Range, Data_Formatter, Sheet_Names, Range_Names, Worksheet, Cell_Range, Delimited, Column_Selector
from Standard.Table.Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch
from Standard.Table.Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch, Empty_Sheet_Error
from Standard.Test import Test, Test_Suite, Problems
import Standard.Test.Extensions
@ -36,9 +36,9 @@ spec_fmt header file read_method =
t.columns.map .name . should_equal ['Student Name', 'Enrolment Date']
t.at 'Enrolment Date' . map .day . to_vector . should_equal [2, 26, 4, 24, 31, 7]
Test.specify "should read an empty table" <|
Test.specify "should give an informative error when reading an empty table" <|
t = read_method file (Excel (Worksheet "Empty"))
t.column_count.should_equal 0
t.should_fail_with Empty_Sheet_Error
Test.specify "should gracefully handle duplicate column names and formulas" <|
t = read_method file (Excel (Worksheet "Duplicate Columns"))

View File

@ -77,13 +77,12 @@ spec =
Test.specify "should handle error scenarios gracefully" <|
Table.new [["X", [1,2,3]], ["Y", [4]]] . should_fail_with Illegal_Argument.Error
Table.new [["X", [1]], ["X", [2]]] . should_fail_with Illegal_Argument.Error
Table.new [] . should_fail_with Illegal_Argument.Error
Table.from_rows ["X", "X"] [] . should_fail_with Illegal_Argument.Error
Table.from_rows ["X", "Y"] [[1,2], [1]] . should_fail_with Index_Out_Of_Bounds.Error
Test.specify "should handle error scenarios gracefully (merge with above once enabled)" pending="To be enabled after errors refactor where instead of returning a table with no columns we throw an error." <|
Table.new [] . should_fail_with Illegal_Argument.Error
Table.from_rows [] [] . should_fail_with Illegal_Argument.Error
Table.from_rows [] [[]] . should_fail_with Illegal_Argument.Error
Test.specify "should correctly infer storage types" <|
varied_type_table.at "strs" . storage_type . should_equal Storage.Text
@ -729,18 +728,6 @@ spec =
problems2 = [Floating_Point_Grouping.Error "X"]
Problems.test_problem_handling action2 problems2 tester2
Test.specify "should report a warning and report the whole table if no columns were selected" <|
t = Table.new [["A", [1, 2, 1, 1]]]
test table = table.should_equal t
action1 = t.distinct (Column_Selector.By_Name []) on_problems=_
problems1 = [No_Input_Columns_Selected]
Problems.test_problem_handling action1 problems1 test
action2 = t.distinct (Column_Selector.By_Name ["mismatched"]) on_problems=_
problems2 = [Missing_Input_Columns.Error ["mismatched"], No_Input_Columns_Selected]
Problems.test_problem_handling action2 problems2 test
Test.specify "until hashing is supported, should throw an error when trying to aggregate a custom object" <|
t = Table.new [["X", [My.Data 1 2, My.Data 3 4, My.Data 1 2]]]
t.distinct . should_fail_with Illegal_Argument.Error

View File

@ -14,10 +14,6 @@ spec =
Json.parse result . should_equal <| Json.parse expected_json_text
Test.group "Geo_Map" <|
Test.specify "works with empty table" <|
table = Table.from_rows [] []
expect table '{}'
Test.specify "skips unrecognized columns" <|
header = ['α' , 'β' , 'ω']
row_1 = [11 , 10 , 09 ]

View File

@ -23,11 +23,6 @@ spec =
json.should_equal expected_json
Test.group "Histogram Visualization" <|
Test.specify "deals with an empty table" <|
table = Table.from_rows [] []
expect table Nothing []
Test.specify "plots first column if none recognized" <|
header = ['α', 'ω']
row_1 = [11 , 10 ]

View File

@ -29,11 +29,6 @@ spec =
no_labels = 'null'
Test.group "Scatter Plot Visualization" <|
Test.specify "deals with an empty table" <|
table = Table.from_rows [] []
expect table 'null' '[]'
Test.specify "plots first column if none recognized" <|
header = ['α', 'ω']
row_1 = [11 , 10 ]