Adding write support to File_Format.Excel (#3551)

Support for writing tables to Excel.

# Important Notes
Has custom support for Error mode as will allow appending a new table in this mode to the file.
This commit is contained in:
James Dunkerley 2022-07-04 19:32:16 +01:00 committed by GitHub
parent 2b2563a395
commit 4ca2097488
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 763 additions and 567 deletions

View File

@ -144,6 +144,7 @@
- [Removed obsolete `from_xls` and `from_xlsx` functions. Added support for
reading column names from first row in `File_Format.Excel`][3523]
- [Added `File_Format.Delimited` support to `Table.write` for new files.][3528]
- [Added `File_Format.Excel` support to `Table.write` for new files.][3551]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -227,6 +228,7 @@
[3519]: https://github.com/enso-org/enso/pull/3519
[3523]: https://github.com/enso-org/enso/pull/3523
[3528]: https://github.com/enso-org/enso/pull/3528
[3551]: https://github.com/enso-org/enso/pull/3551
#### Enso Compiler

View File

@ -524,9 +524,30 @@ type File
Arguments:
- destination: the destination to move the file to.
- replace_existing: specifies if the operation should proceed if the
destination file already exists. Defaults to `True`.
destination file already exists. Defaults to `False`.
copy_to : File -> Boolean -> Nothing ! File_Error
copy_to destination replace_existing=False =
here.handle_java_exceptions self <| case replace_existing of
True ->
copy_options = Array.new_1 StandardCopyOption.REPLACE_EXISTING
self.copy_builtin destination copy_options
False -> self.copy_builtin destination Array.empty
## PRIVATE
Builtin method that copies this file to a new destination.
Recommended to use `File.copy_to` instead which handles potential exceptions.
copy_builtin : File -> Array Any -> Nothing
copy_builtin destination copy_options = @Builtin_Method "File.copy_builtin"
## Moves the file to the specified destination.
Arguments:
- destination: the destination to move the file to.
- replace_existing: specifies if the operation should proceed if the
destination file already exists. Defaults to `False`.
move_to : File -> Boolean -> Nothing ! File_Error
move_to destination replace_existing=True =
move_to destination replace_existing=False =
here.handle_java_exceptions self <| case replace_existing of
True ->
copy_options = Array.new_1 StandardCopyOption.REPLACE_EXISTING

View File

@ -96,7 +96,7 @@ write_file_backing_up_old_one file action = Panic.recover [Io_Error, File_Not_Fo
to back-up anymore, but this is not a failure, so it can be
safely ignored.
Panic.catch File_Not_Found handler=(_->Nothing) <|
Panic.rethrow <| file.move_to bak_file
Panic.rethrow <| file.move_to bak_file replace_existing=True
Panic.rethrow <| new_file.move_to file
go 0

View File

@ -21,7 +21,7 @@
example_to_xlsx =
path = Enso_Project.data / example_xlsx_output.xlsx
Examples.inventory_table.write_xlsx path
Examples.inventory_table.write path
> Example
Join multiple tables together. It joins tables on their indices, so we need

View File

@ -30,7 +30,7 @@
example_to_xlsx =
path = Enso_Project.data / example_xlsx_output.xlsx
Examples.inventory_table.write_xlsx path
Examples.inventory_table.write path
> Example
Write a table to a CSV file.

View File

@ -35,4 +35,4 @@ component-groups:
- Standard.Table.Data.Column.Column.to_table
- Standard.Base.Output:
exports:
- Standard.Table.Data.Table.Table.write_xlsx
- Standard.Table.Data.Table.Table.write

View File

@ -4,7 +4,6 @@ import Standard.Base.System.Platform
import Standard.Table.Data.Column
import Standard.Visualization
from Standard.Base.Data.Time.Date as Date_Module import Date
import Standard.Table.Io.Spreadsheet_Write_Mode
import Standard.Table.Io.File_Format
import Standard.Base.System.File
import Standard.Base.System.File.Existing_File_Behavior
@ -33,7 +32,6 @@ import Standard.Base.Data.Ordering.Comparator
polyglot java import org.enso.table.data.table.Table as Java_Table
polyglot java import org.enso.table.data.table.Column as Java_Column
polyglot java import org.enso.table.format.xlsx.Writer as Spreadsheet_Writer
polyglot java import org.enso.table.operations.OrderBuilder
## Creates a new table from a vector of `[name, items]` pairs.
@ -1030,50 +1028,6 @@ type Table
mask = OrderBuilder.buildReversedMask self.row_count
Table <| self.java_table.applyMask mask
## ALIAS Write Excel File
UNSTABLE
Writes this table into an XLSX spreadsheet.
Arguments:
- file: the XLSX file to write data to. If it exists, the behavior is
specified by the `write_mode` argument. Note that other files may be
created or written to if `max_rows_per_file` is used.
- sheet: the name of the sheet to use for writing the data.
- write_mode: specifies this method's behavior if the specified file and
sheet already exist. Can be one of:
- Spreadsheet_Write_Mode.Create: this is the default value. This
setting will create a new sheet in the file, with a name chosen such
that the clash is avoided.
- Spreadsheet_Write_Mode.Overwrite: will result in removing all
contents of the existing sheet and replacing it with the new data.
- Spreadsheet_Write_Mode.Append: will append this data to the existing
sheet, such that the new data starts after the last row containing
any data.
- include_header: Specifies whether the first line of generated CSV
should contain the column names.
- max_rows_per_file: specifies the maximum number of rows that can be
written to a single file. If this option is set and its value is less
than the number of rows in this table, the behavior of the `file`
argument changes. Instead of writing the contents directly to `file`,
its name is parsed and a numbered series of files with names based
on `file` is written to instead. For example, if `file` points to
`~/my_data/output.xlsx`, `self` contains 250 rows, and
`max_rows_per_file` is set to `100`, 3 different files will be written:
- `~/my_data/output_1.xlsx`, containing rows 0 through 99;
- `~/my_data/output_2.xlsx`, containing rows 100 through 199;
- `~/my_data/output_3.xlsx`, containing rows 200 through 249.
> Example
Write a table to an XLSX file.
import Standard.Examples
example_to_xlsx = Examples.inventory_table.write_xlsx (Enso_Project.data / example_xlsx_output.xlsx)
write_xlsx : File.File -> String -> Spreadsheet_Write_Mode.Speadsheet_Write_Mode -> Boolean -> Nothing | Integer -> Nothing
write_xlsx file sheet='Data' write_mode=Spreadsheet_Write_Mode.Create include_header=True max_rows_per_file=Nothing =
Spreadsheet_Writer.writeXlsx self.java_table file.absolute.path sheet write_mode.to_java include_header max_rows_per_file .write_to_spreadsheet
## ALIAS Write JSON
UNSTABLE
@ -1162,35 +1116,6 @@ type Table
to_csv : Text
to_csv = Text.from self (File_Format.Delimited delimiter=",")
## UNSTABLE
ADVANCED
Used to write a value into a spreadsheet cell.
Arguments:
- cell: an instance of `org.apache.poi.ss.usermodel.Cell`, the value of
which should be set by this method.
Any.write_to_spreadsheet cell = cell.setCellValue self.to_text
## UNSTABLE
ADVANCED
Used to write a value into a spreadsheet cell.
Arguments:
- cell: an instance of `org.apache.poi.ss.usermodel.Cell`, the value of
which should be set by this method.
Text.write_to_spreadsheet cell = cell.setCellValue self
## UNSTABLE
ADVANCED
Used to write a value into a spreadsheet cell.
Arguments:
- cell: an instance of `org.apache.poi.ss.usermodel.Cell`, the value of
which should be set by this method.
Date.write_to_spreadsheet cell = cell.setCellValue self.internal_local_date
## UNSTABLE

View File

@ -174,3 +174,16 @@ type Unsupported_File_Type filename
Unsupported_File_Type.to_display_text : Text
Unsupported_File_Type.to_display_text =
"The "+self.filename+" has a type that is not supported by the Auto format."
## Indicates that the target range contains existing data and the user did not
specify to overwrite.
type Existing_Data message
Existing_Data.to_display_text : Text
Existing_Data.to_display_text = self.message
## Indicates that the specified range is not large enough to fit the data.
type Range_Exceeded message
Range_Exceeded.to_display_text : Text
Range_Exceeded.to_display_text = self.message

View File

@ -1,14 +1,23 @@
from Standard.Base import all
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior
import Standard.Base.System.File.Existing_File_Behavior
import Standard.Base.System.File.Option
from Standard.Table.Io.File_Format import Infer
import Standard.Table.Data.Table
from Standard.Table.Error as Error_Module import Invalid_Location, Duplicate_Output_Column_Names, Invalid_Output_Column_Names
from Standard.Table.Error as Error_Module import Invalid_Location, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Range_Exceeded, Existing_Data
import Standard.Base.Error.Common as Errors
polyglot java import org.enso.table.excel.ExcelRange as Java_Range
polyglot java import org.enso.table.excel.ExcelHeaders
polyglot java import org.enso.table.read.ExcelReader
polyglot java import org.enso.table.write.ExcelWriter
polyglot java import org.enso.table.error.ExistingDataException
polyglot java import org.enso.table.error.RangeExceededException
polyglot java import org.enso.table.error.InvalidLocationException
polyglot java import java.lang.IllegalArgumentException
polyglot java import java.lang.IllegalStateException
polyglot java import java.io.IOException
polyglot java import org.apache.poi.UnsupportedFileFormatException
polyglot java import org.enso.table.util.problems.DuplicateNames
@ -103,8 +112,9 @@ type Excel_Range
## Creates a Range from an address.
from_address : Text -> Excel_Range
from_address address =
Panic.catch IllegalArgumentException (Excel_Range (Java_Range.new address)) caught_panic->
Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage caught_panic.payload.cause)
illegal_argument caught_panic = Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage caught_panic.payload.cause)
Panic.catch IllegalArgumentException handler=illegal_argument <|
Excel_Range (Java_Range.new address)
## Create a Range for a single cell.
for_cell : Text -> (Text|Integer) -> Integer -> Excel_Range
@ -185,16 +195,31 @@ read_excel file section headers on_problems xls_format=False =
Excel_Range _ -> ExcelReader.readRange stream address.java_range (here.make_java_headers headers) skip_rows row_limit xls_format
Text -> ExcelReader.readRangeByName stream address (here.make_java_headers headers) skip_rows row_limit xls_format
bad_argument caught_panic = Error.throw (Invalid_Location caught_panic.payload.cause.getCause)
handle_bad_argument = Panic.catch IllegalArgumentException handler=bad_argument
here.read_excel_file file reader
bad_format caught_panic = Error.throw (File.Io_Error file caught_panic.payload.cause.getMessage)
handle_bad_format = Panic.catch UnsupportedFileFormatException handler=bad_format
write_excel : File -> Table -> Existing_File_Behavior -> (Sheet | Cell_Range) -> (Boolean|Infer) -> Problem_Behavior -> Boolean
write_excel file table on_existing_file section headers _ xls_format=False =
if on_existing_file == Existing_File_Behavior.Append then Errors.unimplemented "Appending to an existing File_Format.Delimited file is not implemented yet." else
workbook = if file.exists.not then ExcelWriter.createWorkbook xls_format else
here.read_excel_file file stream->(ExcelReader.getWorkbook stream xls_format)
File.handle_java_exceptions file <| handle_bad_argument <| handle_bad_format <|
file.with_input_stream [File.Option.Read] stream->(stream.with_java_stream reader)
replace = (on_existing_file == Existing_File_Behavior.Overwrite) || (on_existing_file == Existing_File_Behavior.Backup)
java_headers = here.make_java_headers headers
if ExcelWriter.getEnsoToTextCallback == Nothing then ExcelWriter.getEnsoToTextCallback (.to_text)
result = here.handle_writer <| case section of
Sheet sheet skip_rows row_limit ->
ExcelWriter.writeTableToSheet workbook sheet replace skip_rows table.java_table row_limit java_headers
Cell_Range address skip_rows row_limit -> case address of
Excel_Range _ -> ExcelWriter.writeTableToRange workbook address.java_range replace skip_rows table.java_table row_limit java_headers
Text -> ExcelWriter.writeTableToRange workbook address replace skip_rows table.java_table row_limit java_headers
if result.is_error then result else
write_stream stream = stream.with_java_stream java_stream->
workbook.write java_stream
on_existing_file.write file write_stream
## PRIVATE
prepare_reader_table : Problem_Behavior -> Any -> Table
prepare_reader_table on_problems result_with_problems =
map_problem java_problem =
if Java.is_instance java_problem DuplicateNames then Duplicate_Output_Column_Names (Vector.Vector java_problem.duplicatedNames) else
@ -204,9 +229,37 @@ prepare_reader_table on_problems result_with_problems =
on_problems.attach_problems_after (Table.Table result_with_problems.value) parsing_problems
## PRIVATE
Convert True|False|Infer to the correct HeaderBehavior
make_java_headers : (True|False|Infer) -> ExcelReader.HeaderBehavior
Convert Boolean|Infer to the correct HeaderBehavior
make_java_headers : (Boolean|Infer) -> ExcelHeaders.HeaderBehavior
make_java_headers headers = case headers of
True -> ExcelReader.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
Infer -> ExcelReader.HeaderBehavior.INFER
False -> ExcelReader.HeaderBehavior.EXCEL_COLUMN_NAMES
True -> ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
Infer -> ExcelHeaders.HeaderBehavior.INFER
False -> ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES
## PRIVATE
read_excel_file file reader =
bad_format caught_panic = Error.throw (File.Io_Error file caught_panic.payload.cause.getMessage)
handle_bad_format = Panic.catch UnsupportedFileFormatException handler=bad_format
bad_argument caught_panic = Error.throw (Invalid_Location caught_panic.payload.cause.getCause)
handle_bad_argument = Panic.catch InvalidLocationException handler=bad_argument
File.handle_java_exceptions file <| handle_bad_argument <| handle_bad_format <|
file.with_input_stream [File.Option.Read] stream->
stream.with_java_stream reader
## PRIVATE
handle_writer ~writer =
bad_location caught_panic = Error.throw (Invalid_Location caught_panic.payload.cause.getCause)
handle_bad_location = Panic.catch InvalidLocationException handler=bad_location
throw_range_exceeded caught_panic = Error.throw (Range_Exceeded caught_panic.payload.cause.getMessage)
handle_range_exceeded = Panic.catch RangeExceededException handler=throw_range_exceeded
throw_existing_data caught_panic = Error.throw (Existing_Data caught_panic.payload.cause.getMessage)
handle_existing_data = Panic.catch ExistingDataException handler=throw_existing_data
throw_illegal_state caught_panic = Panic.throw (Illegal_State_Error caught_panic.payload.cause.getMessage)
handle_illegal_state = Panic.catch IllegalStateException handler=throw_illegal_state
handle_illegal_state <| handle_bad_location <| handle_range_exceeded <| handle_existing_data <| writer

View File

@ -186,13 +186,22 @@ type Excel
## Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any
read file on_problems =
format = if self.xls_format != Infer then self.xls_format else
extension = file.extension
(extension.equals_ignore_case ".xls") || (extension.equals_ignore_case ".xlt")
format = Excel.is_xls_format self.xls_format file
Excel_Module.read_excel file self.section self.headers on_problems format
## Implements the `Table.write` for this `File_Format`.
write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing
write_table _ _ _ _ _ =
Errors.unimplemented "`Table.write` for the `Excel` format is not implemented yet."
write_table file table on_existing_file _ on_problems =
format = Excel.is_xls_format self.xls_format file
case self.section of
Excel_Module.Sheet_Names -> Error.throw (Illegal_Argument_Error "Sheet_Names cannot be used for `write`.")
Excel_Module.Range_Names -> Error.throw (Illegal_Argument_Error "Range_Names cannot be used for `write`.")
_ -> Excel_Module.write_excel file table on_existing_file self.section self.headers on_problems format
## PRIVATE
Resolve the xls_format setting to a boolean.
is_xls_format : (Boolean|Infer) -> File -> Boolean
is_xls_format xls_format file =
if xls_format != Infer then xls_format else
extension = file.extension
(extension.equals_ignore_case ".xls") || (extension.equals_ignore_case ".xlt")

View File

@ -1,24 +0,0 @@
from Standard.Base import all
polyglot java import org.enso.table.format.xlsx.Writer
## Specifies the behavior of XLSX writing for pre-existing sheets.
type Spreadsheet_Write_Mode
## Append new data to the existing sheet, such that the new data starts
after the last row containing any data.
type Append
## Create a new sheet, renaming it such that there is no clash with
exisitng sheets.
type Create
## Remove all contents from the existing sheet and write the data to it.
type Overwrite
## PRIVATE
Converts this into a Java-side representation.
to_java = case self of
Append -> Writer.WriteMode.APPEND
Create -> Writer.WriteMode.CREATE_SHEET
Overwrite -> Writer.WriteMode.OVERWRITE_SHEET

View File

@ -2,18 +2,18 @@ from Standard.Base import all
import Standard.Geo.Geo_Json
import Standard.Table.Io.File_Read
import Standard.Table.Io.File_Format
import Standard.Table.Io.Excel
import Standard.Table.Io.Spreadsheet_Write_Mode
import Standard.Table.Data.Table
import Standard.Table.Data.Column
import Standard.Table.Model
from Standard.Table.Io.Excel export Excel_Section, Excel_Range
export Standard.Table.Io.Spreadsheet_Write_Mode
export Standard.Table.Data.Column
export Standard.Table.Model
export Standard.Table.Io.File_Read
export Standard.Table.Io.File_Format
from Standard.Table.Data.Table export new, from_rows, join, concat, No_Such_Column_Error, Table

View File

@ -138,6 +138,12 @@ public class EnsoFile implements TruffleObject {
truffleFile.delete();
}
@Builtin.Method(name = "copy_builtin", description = "Copy this file to a target destination")
@Builtin.WrapException(from = IOException.class, to = PolyglotError.class, propagate = true)
public void copy(EnsoFile target, CopyOption[] options) throws IOException {
truffleFile.copy(target.truffleFile, options);
}
@Builtin.Method(name = "move_builtin", description = "Move this file to a target destination")
@Builtin.WrapException(from = IOException.class, to = PolyglotError.class, propagate = true)
public void move(EnsoFile target, CopyOption[] options) throws IOException {

View File

@ -2,10 +2,7 @@ package org.enso.table.data.column.storage;
import java.util.BitSet;
import java.util.Comparator;
import java.util.function.BiConsumer;
import java.util.function.Function;
import org.apache.poi.ss.usermodel.Cell;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.MapOperation;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
@ -320,9 +317,4 @@ public class BoolStorage extends Storage {
newSize,
negated);
}
@Override
public void writeSpreadsheetCell(int index, Cell cell, BiConsumer<Object, Cell> writeCell) {
cell.setCellValue(getItem(index));
}
}

View File

@ -2,10 +2,7 @@ package org.enso.table.data.column.storage;
import java.util.BitSet;
import java.util.Comparator;
import java.util.function.BiConsumer;
import java.util.function.Function;
import org.apache.poi.ss.usermodel.Cell;
import org.enso.table.data.column.builder.object.NumericBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
@ -268,9 +265,4 @@ public class DoubleStorage extends NumericStorage {
BitSet newMask = isMissing.get(offset, offset + limit);
return new DoubleStorage(newData, newSize, newMask);
}
@Override
public void writeSpreadsheetCell(int index, Cell cell, BiConsumer<Object, Cell> writeCell) {
cell.setCellValue(getItem(index));
}
}

View File

@ -1,11 +1,8 @@
package org.enso.table.data.column.storage;
import java.util.*;
import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.stream.LongStream;
import org.apache.poi.ss.usermodel.Cell;
import org.enso.table.data.column.builder.object.NumericBuilder;
import org.enso.table.data.column.operation.aggregate.Aggregator;
import org.enso.table.data.column.operation.aggregate.numeric.LongToLongAggregator;
@ -98,9 +95,8 @@ public class LongStorage extends NumericStorage {
@Override
protected Aggregator getVectorizedAggregator(String name, int resultSize) {
switch (name) {
case Aggregators.SUM:
return new LongToLongAggregator(this, resultSize) {
return switch (name) {
case Aggregators.SUM -> new LongToLongAggregator(this, resultSize) {
@Override
protected void runGroup(LongStream items) {
long[] elements = items.toArray();
@ -111,8 +107,7 @@ public class LongStorage extends NumericStorage {
}
}
};
case Aggregators.MAX:
return new LongToLongAggregator(this, resultSize) {
case Aggregators.MAX -> new LongToLongAggregator(this, resultSize) {
@Override
protected void runGroup(LongStream items) {
OptionalLong r = items.max();
@ -123,8 +118,7 @@ public class LongStorage extends NumericStorage {
}
}
};
case Aggregators.MIN:
return new LongToLongAggregator(this, resultSize) {
case Aggregators.MIN -> new LongToLongAggregator(this, resultSize) {
@Override
protected void runGroup(LongStream items) {
OptionalLong r = items.min();
@ -135,9 +129,8 @@ public class LongStorage extends NumericStorage {
}
}
};
default:
return super.getVectorizedAggregator(name, resultSize);
}
default -> super.getVectorizedAggregator(name, resultSize);
};
}
private Storage fillMissingDouble(double arg) {
@ -382,9 +375,4 @@ public class LongStorage extends NumericStorage {
BitSet newMask = isMissing.get(offset, offset + limit);
return new LongStorage(newData, newSize, newMask);
}
@Override
public void writeSpreadsheetCell(int index, Cell cell, BiConsumer<Object, Cell> writeCell) {
cell.setCellValue(getItem(index));
}
}

View File

@ -1,17 +1,13 @@
package org.enso.table.data.column.storage;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Comparator;
import java.util.function.BiConsumer;
import java.util.function.Function;
import org.apache.poi.ss.usermodel.Cell;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.index.Index;
import org.enso.table.data.mask.OrderMask;
import java.util.BitSet;
import java.util.Comparator;
/** A column storing arbitrary objects. */
public class ObjectStorage extends Storage {
private final Object[] data;
@ -157,9 +153,4 @@ public class ObjectStorage extends Storage {
System.arraycopy(data, offset, newData, 0, newSize);
return new ObjectStorage(newData, newSize);
}
@Override
public void writeSpreadsheetCell(int index, Cell cell, BiConsumer<Object, Cell> writeCell) {
writeCell.accept(getItem(index), cell);
}
}

View File

@ -1,6 +1,5 @@
package org.enso.table.data.column.storage;
import org.apache.poi.ss.usermodel.Cell;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.operation.aggregate.Aggregator;
@ -8,7 +7,6 @@ import org.enso.table.data.column.operation.aggregate.CountAggregator;
import org.enso.table.data.column.operation.aggregate.FunctionAggregator;
import java.util.*;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Function;
@ -297,14 +295,4 @@ public abstract class Storage {
}
return new LongStorage(data);
}
/**
* Write the value at the specified index into an XLSX cell.
*
* @param index the index to read value at. {@link #isNa(long)} must return false for this index.
* @param cell the cell to write data to
* @param writeCell a callback to delegate writes back to Enso code
*/
public abstract void writeSpreadsheetCell(
int index, Cell cell, BiConsumer<Object, Cell> writeCell);
}

View File

@ -2,9 +2,6 @@ package org.enso.table.data.column.storage;
import java.util.BitSet;
import java.util.Comparator;
import java.util.function.BiConsumer;
import java.util.function.Function;
import org.apache.poi.ss.usermodel.Cell;
import org.enso.table.data.column.builder.object.StringBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.MapOperation;
@ -71,7 +68,7 @@ public class StringStorage extends ObjectStorage {
@Override
public StringStorage applyMask(OrderMask mask) {
ObjectStorage storage = super.applyMask(mask);
return new StringStorage(storage.getData(), (int) storage.size());
return new StringStorage(storage.getData(), storage.size());
}
@Override
@ -146,9 +143,4 @@ public class StringStorage extends ObjectStorage {
ObjectStorage storage = super.slice(offset, limit);
return new StringStorage(storage.getData(), storage.size());
}
@Override
public void writeSpreadsheetCell(int index, Cell cell, BiConsumer<Object, Cell> writeCell) {
cell.setCellValue(getItem(index));
}
}

View File

@ -0,0 +1,7 @@
package org.enso.table.error;
public class ExistingDataException extends Exception {
public ExistingDataException(String errorMessage) {
super(errorMessage);
}
}

View File

@ -0,0 +1,7 @@
package org.enso.table.error;
public class InvalidLocationException extends Exception {
public InvalidLocationException(String errorMessage) {
super(errorMessage);
}
}

View File

@ -0,0 +1,7 @@
package org.enso.table.error;
public class RangeExceededException extends Exception {
public RangeExceededException(String errorMessage) {
super(errorMessage);
}
}

View File

@ -5,7 +5,6 @@ import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellReference;
import org.enso.table.problems.Problem;
import org.enso.table.read.ExcelReader;
import org.enso.table.util.NameDeduplicator;
import java.util.List;
@ -15,7 +14,7 @@ public class ExcelHeaders {
private final int startCol;
private final String[] names;
public ExcelHeaders(ExcelReader.HeaderBehavior headers, ExcelRow startRow, ExcelRow nextRow, int startCol, int endCol) {
public ExcelHeaders(HeaderBehavior headers, ExcelRow startRow, ExcelRow nextRow, int startCol, int endCol) {
deduplicator = new NameDeduplicator();
this.startCol = startCol;
@ -75,32 +74,27 @@ public class ExcelHeaders {
return null;
}
String[] rowNames = getCellsAsText(row, startCol, endCol);
String[] rowNames = row.getCellsAsText(startCol, endCol);
if (rowNames == null) {
return null;
}
String[] nextNames = getCellsAsText(nextRow, startCol, endCol);
if (nextNames != null) {
if (nextRow.getCellsAsText(startCol, endCol) != null) {
return null;
}
return deduplicator.makeUnique(rowNames);
}
private static String[] getCellsAsText(ExcelRow row, int startCol, int endCol) {
int currentEndCol = endCol == -1 ? row.getLastColumn() : endCol;
/** Specifies how to set the headers for the returned table. */
public enum HeaderBehavior {
/** Tries to infer if the headers are present in the file. */
INFER,
String[] output = new String[currentEndCol - startCol + 1];
for (int col = startCol; col <= currentEndCol; col++) {
Cell cell = row.get(col);
CellType type = ExcelRow.getCellType(cell);
if (type != CellType._NONE && type != CellType.STRING) {
return null;
}
output[col - startCol] = type == CellType.STRING && cell != null ? cell.getStringCellValue() : "";
}
/** Uses the first row in the file as headers. Duplicate names will be appended suffixes. */
USE_FIRST_ROW_AS_HEADERS,
return output;
/** Uses the default Excel Column Names (e.g. A, B, C). */
EXCEL_COLUMN_NAMES
}
}

View File

@ -1,5 +1,6 @@
package org.enso.table.excel;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.util.CellReference;
import java.util.Optional;
@ -147,15 +148,67 @@ public class ExcelRange {
return parsed.value;
}
/**
* Create an ExcelRange for a given set of columns.
*
* @param sheetName name of the containing sheet.
* @param leftColumn left-hand column index (1-based).
* @param rightColumn inclusive right-hand column index (1-based).
* @return ExcelRange for the given columns.
*/
public static ExcelRange forColumns(String sheetName, int leftColumn, int rightColumn) {
return new ExcelRange(
sheetName, leftColumn, WHOLE_ROW_OR_COLUMN, rightColumn, WHOLE_ROW_OR_COLUMN);
}
/**
* Create an ExcelRange for a given set of rows.
*
* @param sheetName name of the containing sheet.
* @param topRow top row index (1-based).
* @param bottomRow inclusive bottom row index (1-based).
* @return ExcelRange for the given rows.
*/
public static ExcelRange forRows(String sheetName, int topRow, int bottomRow) {
return new ExcelRange(sheetName, WHOLE_ROW_OR_COLUMN, topRow, WHOLE_ROW_OR_COLUMN, bottomRow);
}
/**
* Given a single cell and the containing sheet, expand the range to cover the connected table of
* cells.
*
* @param excelRange Range referring to top left cell.
* @param sheet ExcelSheet containing the range refers to.
* @return Expanded range covering the connected table of cells.
*/
public static ExcelRange expandSingleCell(ExcelRange excelRange, ExcelSheet sheet) {
ExcelRow currentRow = sheet.get(excelRange.getTopRow());
if (currentRow == null || currentRow.isEmpty(excelRange.getLeftColumn())) {
return new ExcelRange(
excelRange.getSheetName(),
excelRange.getLeftColumn(),
excelRange.getTopRow(),
excelRange.getLeftColumn(),
excelRange.getTopRow());
}
int bottomRow = excelRange.getTopRow();
int rightColumn = excelRange.getLeftColumn();
while (currentRow != null && !currentRow.isEmpty(excelRange.getLeftColumn(), rightColumn)) {
rightColumn = currentRow.findEndRight(rightColumn);
bottomRow++;
currentRow = sheet.get(bottomRow);
}
return new ExcelRange(
excelRange.getSheetName(),
excelRange.getLeftColumn(),
excelRange.getTopRow(),
rightColumn,
bottomRow - 1);
}
/**
* @param index Index to the next character after the parsed value
* @param value Parsed integer value or 0 if not valid
@ -251,6 +304,10 @@ public class ExcelRange {
return rightColumn;
}
public int getColumnCount() {
return isWholeRow() ? Integer.MAX_VALUE : rightColumn - leftColumn + 1;
}
public boolean isWholeColumn() {
return topRow == WHOLE_ROW_OR_COLUMN;
}
@ -263,6 +320,10 @@ public class ExcelRange {
return bottomRow;
}
public int getRowCount() {
return isWholeColumn() ? Integer.MAX_VALUE : bottomRow - topRow + 1;
}
public boolean isSingleCell() {
return this.singleCell;
}

View File

@ -37,7 +37,12 @@ public class ExcelRow {
if (DateUtil.isCellDateFormatted(cell)) {
return cell.getLocalDateTimeCellValue().toLocalDate();
} else {
return cell.getNumericCellValue();
double dblValue = cell.getNumericCellValue();
if (dblValue == (long) dblValue) {
return (long) dblValue;
} else {
return dblValue;
}
}
case STRING:
return cell.getStringCellValue();
@ -62,12 +67,14 @@ public class ExcelRow {
}
public boolean isEmpty(int column) {
return isEmpty(column, column);
CellType cellType = getCellType(get(column));
return (cellType == CellType._NONE) || (cellType == CellType.BLANK);
}
public boolean isEmpty(int start, int end) {
for (int column = start; column <= end; column++) {
if (getCellType(get(column)) != CellType._NONE) {
int currentEnd = end == -1 ? getLastColumn() : end;
for (int column = start; column <= currentEnd; column++) {
if (!isEmpty(column)) {
return false;
}
}
@ -76,9 +83,26 @@ public class ExcelRow {
public int findEndRight(int start) {
int column = start;
while (getCellType(get(column + 1)) != CellType._NONE) {
while (!isEmpty(column + 1)) {
column++;
}
return column;
}
public String[] getCellsAsText(int startCol, int endCol) {
int currentEndCol = endCol == -1 ? getLastColumn() : endCol;
String[] output = new String[currentEndCol - startCol + 1];
for (int col = startCol; col <= currentEndCol; col++) {
Cell cell = get(col);
CellType type = ExcelRow.getCellType(cell);
if (type != CellType._NONE && type != CellType.BLANK && type != CellType.STRING) {
return null;
}
output[col - startCol] =
type == CellType.STRING && cell != null ? cell.getStringCellValue() : "";
}
return output;
}
}

View File

@ -28,4 +28,8 @@ public class ExcelSheet {
Row underlyingRow = row < firstRow || row > lastRow ? null : sheet.getRow(row - 1);
return underlyingRow == null ? null : new ExcelRow(underlyingRow);
}
public Sheet getSheet() {
return sheet;
}
}

View File

@ -1,189 +0,0 @@
package org.enso.table.format.xlsx;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.enso.table.data.table.Table;
import org.enso.table.format.util.FileSplitter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.function.BiConsumer;
/** Writer for XLSX files. */
public class Writer {
/** Specifies write behavior for files that already exist. */
public enum WriteMode {
/** Append new contents to the existing sheet. */
APPEND,
/** Remove old contents and replace with new. */
OVERWRITE_SHEET,
/** Create a new sheet, avoiding a name clash. */
CREATE_SHEET
}
/**
* Write a table to XLSX.
*
* @param table the table
* @param path the path to the xlsx file
* @param sheetName the name of the sheet
* @param writeMode specification of this function's behavior when the specified sheet already
* exists
* @param writeHeader whether the first row should contain column names
* @param maxRecords the max number of records that can be written to a single file
* @param writeCell a helper for writing arbitrary objects into XLSX cells.
* @throws IOException when any of the files cannot be read.
* @throws InvalidFormatException when the specified file exists, but is not an XLSX file.
*/
public static void writeXlsx(
Table table,
String path,
String sheetName,
WriteMode writeMode,
boolean writeHeader,
Integer maxRecords,
BiConsumer<Object, Cell> writeCell)
throws IOException, InvalidFormatException {
if (maxRecords == null || maxRecords >= table.rowCount()) {
var file = new File(path);
writeXlsx(table, file, sheetName, writeMode, writeHeader, 0, table.rowCount(), writeCell);
} else {
var splitter = new FileSplitter(table.rowCount(), maxRecords, new File(path));
for (int i = 0; i < splitter.getNumberOfFiles(); i++) {
writeXlsx(
table,
splitter.getFile(i),
sheetName,
writeMode,
writeHeader,
i * maxRecords,
maxRecords,
writeCell);
}
}
}
private static Workbook workbookForFile(File file) throws IOException, InvalidFormatException {
if (file.exists()) {
try (var stream = new FileInputStream(file)) {
return new XSSFWorkbook(stream);
}
} else {
return new XSSFWorkbook();
}
}
private static void writeXlsx(
Table table,
File file,
String sheetName,
WriteMode writeMode,
boolean writeHeader,
int startRecord,
int numRecords,
BiConsumer<Object, Cell> writeCell)
throws IOException, InvalidFormatException {
try (var workbook = workbookForFile(file)) {
writeWorkbook(
table, workbook, sheetName, writeMode, writeHeader, startRecord, numRecords, writeCell);
try (var outputStream = new FileOutputStream(file)) {
workbook.write(outputStream);
}
}
}
private static void writeWorkbook(
Table table,
Workbook workbook,
String sheetName,
WriteMode writeMode,
boolean writeHeader,
int startRecord,
int numRecords,
BiConsumer<Object, Cell> writeCell) {
var sheet = workbook.getSheet(sheetName);
if (sheet == null) {
var newSheet = workbook.createSheet(sheetName);
writeSheet(table, newSheet, writeHeader, startRecord, numRecords, 0, 0, writeCell);
return;
}
switch (writeMode) {
case APPEND:
writeSheet(
table,
sheet,
writeHeader,
startRecord,
numRecords,
sheet.getLastRowNum() + 1,
0,
writeCell);
workbook.setForceFormulaRecalculation(true);
return;
case OVERWRITE_SHEET:
int row;
while ((row = sheet.getLastRowNum()) != -1) {
sheet.removeRow(sheet.getRow(row));
}
writeSheet(table, sheet, writeHeader, startRecord, numRecords, 0, 0, writeCell);
workbook.setForceFormulaRecalculation(true);
return;
case CREATE_SHEET:
int currentSheet = 1;
var newSheetName = "";
do {
newSheetName = sheetName + " " + currentSheet;
sheet = workbook.getSheet(newSheetName);
currentSheet++;
} while (sheet != null);
sheet = workbook.createSheet(newSheetName);
writeSheet(table, sheet, writeHeader, startRecord, numRecords, 0, 0, writeCell);
workbook.setForceFormulaRecalculation(true);
return;
}
}
private static void writeSheet(
Table table,
Sheet sheet,
boolean writeHeader,
int startRecord,
int numRecords,
int startRow,
int startCol,
BiConsumer<Object, Cell> writeCell) {
var columns = Arrays.asList(table.getColumns());
var index = table.getIndex().toColumn();
if (index != null) {
columns.add(0, index);
}
if (writeHeader) {
var row = sheet.createRow(startRow);
startRow++;
for (int j = 0; j < columns.size(); j++) {
var cell = row.createCell(startCol + j, CellType.STRING);
cell.setCellValue(columns.get(j).getName());
}
}
var rowLimit = Math.min(numRecords, table.rowCount() - startRecord);
for (int i = 0; i < rowLimit; i++) {
var row = sheet.createRow(startRow + i);
for (int j = 0; j < columns.size(); j++) {
var cell = row.createCell(startCol + j);
var storage = columns.get(j).getStorage();
if (storage.isNa(startRecord + i)) {
cell.setBlank();
} else {
storage.writeSpreadsheetCell(startRecord + i, cell, writeCell);
}
}
}
}
}

View File

@ -1,7 +1,8 @@
package org.enso.table.read;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.Name;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.enso.table.data.column.builder.object.Builder;
@ -9,6 +10,7 @@ import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.storage.ObjectStorage;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
import org.enso.table.error.InvalidLocationException;
import org.enso.table.excel.ExcelHeaders;
import org.enso.table.excel.ExcelRange;
import org.enso.table.excel.ExcelRow;
@ -67,20 +69,21 @@ public class ExcelReader {
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @return a {@link Table} containing the specified data.
* @throws IOException when the input stream cannot be read.
* @throws InvalidLocationException when the sheet name is not found.
*/
public static WithProblems<Table> readSheetByName(
InputStream stream,
String sheetName,
HeaderBehavior headers,
ExcelHeaders.HeaderBehavior headers,
int skip_rows,
Integer row_limit,
boolean xls_format)
throws IOException, IllegalArgumentException {
throws IOException, InvalidLocationException {
Workbook workbook = getWorkbook(stream, xls_format);
int sheetIndex = getSheetIndex(workbook, sheetName);
int sheetIndex = workbook.getSheetIndex(sheetName);
if (sheetIndex == -1) {
throw new IllegalArgumentException("Unknown sheet '" + sheetName + "'.");
throw new InvalidLocationException("Unknown sheet '" + sheetName + "'.");
}
return readTable(
@ -102,20 +105,21 @@ public class ExcelReader {
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @return a {@link Table} containing the specified data.
* @throws IOException when the input stream cannot be read.
* @throws InvalidLocationException when the sheet index is not valid.
*/
public static WithProblems<Table> readSheetByIndex(
InputStream stream,
int index,
HeaderBehavior headers,
ExcelHeaders.HeaderBehavior headers,
int skip_rows,
Integer row_limit,
boolean xls_format)
throws IOException, IllegalArgumentException {
throws IOException, InvalidLocationException {
Workbook workbook = getWorkbook(stream, xls_format);
int sheetCount = workbook.getNumberOfSheets();
if (index < 1 || index > sheetCount) {
throw new IllegalArgumentException(
throw new InvalidLocationException(
"Sheet index is not in valid range (1 to " + sheetCount + " inclusive).");
}
@ -138,20 +142,28 @@ public class ExcelReader {
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @return a {@link Table} containing the specified data.
* @throws IOException when the input stream cannot be read.
* @throws InvalidLocationException when the range name or address is not found.
*/
public static WithProblems<Table> readRangeByName(
InputStream stream,
String rangeNameOrAddress,
HeaderBehavior headers,
ExcelHeaders.HeaderBehavior headers,
int skip_rows,
Integer row_limit,
boolean xls_format)
throws IOException {
throws IOException, InvalidLocationException {
Workbook workbook = getWorkbook(stream, xls_format);
Name name = workbook.getName(rangeNameOrAddress);
ExcelRange excelRange =
new ExcelRange(name == null ? rangeNameOrAddress : name.getRefersToFormula());
ExcelRange excelRange;
try {
excelRange = new ExcelRange(name == null ? rangeNameOrAddress : name.getRefersToFormula());
} catch (IllegalArgumentException e) {
throw new InvalidLocationException(
"Invalid range name or address '" + rangeNameOrAddress + "'.");
}
return readRange(workbook, excelRange, headers, skip_rows, row_limit);
}
@ -169,39 +181,36 @@ public class ExcelReader {
public static WithProblems<Table> readRange(
InputStream stream,
ExcelRange excelRange,
HeaderBehavior headers,
ExcelHeaders.HeaderBehavior headers,
int skip_rows,
Integer row_limit,
boolean xls_format)
throws IOException {
throws IOException, InvalidLocationException {
return readRange(getWorkbook(stream, xls_format), excelRange, headers, skip_rows, row_limit);
}
/** Specifies how to set the headers for the returned table. */
public enum HeaderBehavior {
/** Tries to infer if the headers are present in the file. */
INFER,
/** Uses the first row in the file as headers. Duplicate names will be appended suffixes. */
USE_FIRST_ROW_AS_HEADERS,
/** Uses the default Excel Column Names (e.g. A, B, C). */
EXCEL_COLUMN_NAMES
}
private static Workbook getWorkbook(InputStream stream, boolean xls_format) throws IOException {
/**
* Load a workbook into memory from an InputStream.
*
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @return a {@link Workbook} containing the specified data.
* @throws IOException when the input stream cannot be read or an incorrect format occurs.
*/
public static Workbook getWorkbook(InputStream stream, boolean xls_format) throws IOException {
return xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream);
}
private static WithProblems<Table> readRange(
Workbook workbook,
ExcelRange excelRange,
HeaderBehavior headers,
ExcelHeaders.HeaderBehavior headers,
int skip_rows,
Integer row_limit) {
int sheetIndex = getSheetIndex(workbook, excelRange.getSheetName());
Integer row_limit)
throws InvalidLocationException {
int sheetIndex = workbook.getSheetIndex(excelRange.getSheetName());
if (sheetIndex == -1) {
throw new IllegalArgumentException("Unknown sheet '" + excelRange.getSheetName() + "'.");
throw new InvalidLocationException("Unknown sheet '" + excelRange.getSheetName() + "'.");
}
return readTable(
@ -217,7 +226,7 @@ public class ExcelReader {
Workbook workbook,
int sheetIndex,
ExcelRange excelRange,
HeaderBehavior headers,
ExcelHeaders.HeaderBehavior headers,
int skipRows,
int rowCount) {
ExcelSheet sheet = new ExcelSheet(workbook, sheetIndex);
@ -236,7 +245,7 @@ public class ExcelReader {
Collections.emptyList());
}
excelRange = expandSingleCell(excelRange, sheet, currentRow);
excelRange = ExcelRange.expandSingleCell(excelRange, sheet);
}
// Row Range
@ -306,26 +315,6 @@ public class ExcelReader {
return new WithProblems<>(new Table(columns), excelHeaders.getProblems());
}
private static ExcelRange expandSingleCell(
ExcelRange excelRange, ExcelSheet sheet, ExcelRow currentRow) {
int bottomRow = excelRange.getTopRow();
int rightColumn = excelRange.getLeftColumn();
while (currentRow != null && !currentRow.isEmpty(excelRange.getLeftColumn(), rightColumn)) {
rightColumn = currentRow.findEndRight(rightColumn);
bottomRow++;
currentRow = sheet.get(bottomRow);
}
excelRange =
new ExcelRange(
excelRange.getSheetName(),
excelRange.getLeftColumn(),
excelRange.getTopRow(),
rightColumn,
bottomRow - 1);
return excelRange;
}
private static void expandBuilders(List<Builder> builders, int size, int columnCount, int rows) {
for (int i = builders.size(); i <= columnCount; i++) {
Builder builder = new InferredBuilder(size);
@ -333,14 +322,4 @@ public class ExcelReader {
builders.add(builder);
}
}
private static int getSheetIndex(Workbook workbook, String sheetName) {
int sheetCount = workbook.getNumberOfSheets();
for (int i = 0; i < sheetCount; i++) {
if (workbook.getSheetName(i).equalsIgnoreCase(sheetName)) {
return i;
}
}
return -1;
}
}

View File

@ -0,0 +1,324 @@
package org.enso.table.write;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Name;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.DoubleStorage;
import org.enso.table.data.column.storage.LongStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
import org.enso.table.error.ExistingDataException;
import org.enso.table.error.RangeExceededException;
import org.enso.table.error.InvalidLocationException;
import org.enso.table.excel.ExcelHeaders;
import org.enso.table.excel.ExcelRange;
import org.enso.table.excel.ExcelRow;
import org.enso.table.excel.ExcelSheet;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.util.Arrays;
import java.util.function.Function;
public class ExcelWriter {
private static final double SECONDS_IN_A_DAY = 86400.0;
private static Function<Object, Boolean> ensoToTextCallback;
public static Function<Object, Boolean> getEnsoToTextCallback() {
return ensoToTextCallback;
}
public static void getEnsoToTextCallback(Function<Object, Boolean> callback) {
ensoToTextCallback = callback;
}
public static void writeTableToSheet(Workbook workbook, int sheetIndex, boolean replace, int firstRow, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers)
throws ExistingDataException, IllegalStateException {
if (sheetIndex == 0 || sheetIndex > workbook.getNumberOfSheets()) {
int i = 1;
while (workbook.getSheet("Sheet" + i) != null) {
i++;
}
Sheet sheet = workbook.createSheet("Sheet" + i);
if (sheetIndex == 0) {
workbook.setSheetOrder(sheet.getSheetName(), 0);
}
writeTableToSheet(workbook, sheet, firstRow, 1, table, rowLimit, headers != ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES);
} else if (replace) {
if (headers == ExcelHeaders.HeaderBehavior.INFER) {
ExcelSheet excelSheet = new ExcelSheet(workbook, sheetIndex);
headers = shouldWriteHeaders(excelSheet, firstRow + 1, 1, -1)
? ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
: ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES;
}
String sheetName = workbook.getSheetName(sheetIndex - 1);
workbook.removeSheetAt(sheetIndex - 1);
Sheet sheet = workbook.createSheet(sheetName);
workbook.setSheetOrder(sheetName, sheetIndex - 1);
writeTableToSheet(workbook, sheet, firstRow, 1, table, rowLimit, headers != ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES);
} else {
throw new ExistingDataException("Sheet already exists, and cannot be replaced in current mode.");
}
}
public static void writeTableToSheet(Workbook workbook, String sheetName, boolean replace, int firstRow, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers)
throws ExistingDataException, IllegalStateException {
int sheetIndex = workbook.getNumberOfSheets() == 0 ? -1 : workbook.getSheetIndex(sheetName);
if (sheetIndex == -1) {
writeTableToSheet(workbook, workbook.createSheet(sheetName), firstRow, 1, table, rowLimit, headers != ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES);
} else if (replace) {
if (headers == ExcelHeaders.HeaderBehavior.INFER) {
ExcelSheet excelSheet = new ExcelSheet(workbook, sheetIndex);
headers = shouldWriteHeaders(excelSheet, firstRow + 1, 1, -1)
? ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
: ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES;
}
workbook.removeSheetAt(sheetIndex);
Sheet sheet = workbook.createSheet(sheetName);
workbook.setSheetOrder(sheetName, sheetIndex);
writeTableToSheet(workbook, sheet, firstRow, 1, table, rowLimit, headers != ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES);
} else {
throw new ExistingDataException("Sheet '" + sheetName + "' already exists, and cannot be replaced in current mode.");
}
}
public static void writeTableToRange(Workbook workbook, String rangeNameOrAddress, boolean replace, int skipRows, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers)
throws InvalidLocationException, IllegalStateException, RangeExceededException, ExistingDataException {
Name name = workbook.getName(rangeNameOrAddress);
ExcelRange excelRange;
try {
excelRange = new ExcelRange(name == null ? rangeNameOrAddress : name.getRefersToFormula());
} catch (IllegalArgumentException e) {
throw new InvalidLocationException("Invalid range name or address '" + rangeNameOrAddress + "'.");
}
writeTableToRange(workbook, excelRange, replace, skipRows, table, rowLimit, headers);
}
public static void writeTableToRange(Workbook workbook, ExcelRange range, boolean replace, int skipRows, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers)
throws InvalidLocationException, IllegalStateException, RangeExceededException, ExistingDataException {
int sheetIndex = workbook.getSheetIndex(range.getSheetName());
if (sheetIndex == -1) {
throw new InvalidLocationException("Unknown sheet '" + range.getSheetName() + "'.");
}
ExcelSheet sheet = new ExcelSheet(workbook, sheetIndex);
if (skipRows != 0) {
if (range.isWholeColumn()) {
range = new ExcelRange(range.getSheetName(), skipRows + 1, range.getLeftColumn(), workbook.getSpreadsheetVersion().getMaxRows(), range.getRightColumn());
} else if (range.isSingleCell()) {
range = new ExcelRange(range.getSheetName(), range.getTopRow() + skipRows, range.getLeftColumn());
} else {
range = new ExcelRange(range.getSheetName(), range.getTopRow() + skipRows, range.getLeftColumn(), range.getBottomRow(), range.getRightColumn());
}
}
if (range.isSingleCell()) {
ExcelRange expanded = ExcelRange.expandSingleCell(range, sheet);
if (headers == ExcelHeaders.HeaderBehavior.INFER) {
headers = shouldWriteHeaders(sheet, expanded.getTopRow(), expanded.getLeftColumn(), expanded.getRightColumn())
? ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
: ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES;
}
// Expand to cover required size
int rowCount = (headers == ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS ? 1 : 0) + table.rowCount();
if (expanded.getColumnCount() < table.getColumns().length || expanded.getRowCount() < rowCount) {
expanded = new ExcelRange(
expanded.getSheetName(),
expanded.getLeftColumn(),
expanded.getTopRow(),
Math.max(expanded.getRightColumn(), expanded.getLeftColumn() + table.getColumns().length - 1),
Math.max(expanded.getBottomRow(), expanded.getTopRow() + rowCount - 1));
}
checkExistingRange(workbook, expanded, replace, sheet);
} else {
// Check Size of Range
int rowCount = Math.min(Math.min(workbook.getSpreadsheetVersion().getMaxRows() - range.getTopRow() + 1, rowLimit == null ? Integer.MAX_VALUE : rowLimit.intValue()), table.rowCount());
if (range.getColumnCount() < table.getColumns().length || range.getRowCount() < rowCount) {
throw new RangeExceededException("Range is too small to fit all columns.");
}
if (headers == ExcelHeaders.HeaderBehavior.INFER) {
headers = shouldWriteHeaders(sheet, range.getTopRow(), range.getLeftColumn(), range.isWholeRow() ? -1 : range.getRightColumn())
? ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
: ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES;
}
checkExistingRange(workbook, range, replace, sheet);
}
writeTableToSheet(workbook, sheet.getSheet(), range.getTopRow() - 1, range.getLeftColumn(), table, rowLimit, headers != ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES);
}
private static void checkExistingRange(Workbook workbook, ExcelRange range, boolean replace, ExcelSheet sheet) throws ExistingDataException {
int topRow = range.isWholeColumn() ? 1 : range.getTopRow();
int bottomRow = range.isWholeColumn() ? workbook.getSpreadsheetVersion().getMaxRows() : range.getBottomRow();
int leftColumn = range.isWholeRow() ? 1 : range.getLeftColumn();
int rightColumn = range.isWholeRow() ? workbook.getSpreadsheetVersion().getMaxColumns() : range.getRightColumn();
for (int row = topRow; row <= bottomRow; row++) {
ExcelRow excelRow = sheet.get(row);
if (excelRow != null) {
for (int column = leftColumn; column <= rightColumn; column++) {
Cell cell = excelRow.get(column);
if (cell != null) {
if (replace) {
cell.setBlank();
} else {
throw new ExistingDataException("Range is not empty, and cannot be replaced in current mode.");
}
}
}
}
}
}
/**
* Creates an empty workbook.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @return a {@link Workbook} containing the specified data.
*/
public static Workbook createWorkbook(boolean xls_format) {
return xls_format ? new HSSFWorkbook() : new XSSFWorkbook();
}
private static void writeTableToSheet(Workbook workbook, Sheet sheet, int firstRow, int firstColumn, Table table, Long rowLimit, boolean headers)
throws IllegalStateException {
int rowCount = Math.min(Math.min(workbook.getSpreadsheetVersion().getMaxRows() - firstRow, rowLimit == null ? Integer.MAX_VALUE : rowLimit.intValue()), table.rowCount());
int currentRow = firstRow;
Column[] columns = table.getColumns();
if (headers) {
Row row = sheet.createRow(currentRow);
for (int i = 0; i < columns.length; i++) {
row.createCell(i + firstColumn - 1, CellType.STRING).setCellValue(columns[i].getName());
}
currentRow++;
}
if (rowCount == 0) {
return;
}
Storage[] storages = Arrays.stream(columns).map(Column::getStorage).toArray(Storage[]::new);
for (int i = 0; i < rowCount; i++) {
Row row = sheet.getRow(currentRow);
if (row == null) {
row = sheet.createRow(currentRow);
}
for (int j = 0; j < columns.length; j++) {
Storage storage = storages[j];
int idx = j + firstColumn - 1;
Cell cell = row.getCell(idx);
if (cell == null) {
cell = row.createCell(idx);
}
writeValueToCell(cell, i, storage, workbook);
}
currentRow++;
}
workbook.setForceFormulaRecalculation(true);
}
private static CellStyle getDateTimeStyle(Workbook workbook, String format) {
for(int i = 0; i < workbook.getNumCellStyles(); i++) {
CellStyle style = workbook.getCellStyleAt(i);
if (style.getDataFormatString().equals(format)) {
return style;
}
}
CellStyle newStyle = workbook.createCellStyle();
newStyle.setDataFormat(workbook.createDataFormat().getFormat(format));
return newStyle;
}
private static void writeValueToCell(Cell cell, int j, Storage storage, Workbook workbook)
throws IllegalStateException {
if (storage.isNa(j)) {
cell.setBlank();
} else if (storage instanceof DoubleStorage doubleStorage) {
cell.setCellValue(doubleStorage.getItem(j));
} else if (storage instanceof LongStorage longStorage) {
cell.setCellValue(longStorage.getItem(j));
} else if (storage instanceof BoolStorage boolStorage) {
cell.setCellValue(boolStorage.getItem(j));
} else {
Object value = storage.getItemBoxed(j);
switch (value) {
case String s -> cell.setCellValue(s);
case Boolean b -> cell.setCellValue(b);
case Double d -> cell.setCellValue(d);
case Long l -> cell.setCellValue(l);
case LocalDateTime ldt -> {
cell.setCellValue(ldt);
cell.setCellStyle(getDateTimeStyle(workbook, "yyyy-MM-dd HH:mm:ss"));
}
case LocalDate ld -> {
cell.setCellValue(ld);
cell.setCellStyle(getDateTimeStyle(workbook, "yyyy-MM-dd"));
}
case LocalTime lt -> {
cell.setCellValue(lt.toSecondOfDay() / SECONDS_IN_A_DAY);
cell.setCellStyle(getDateTimeStyle(workbook, "HH:mm:ss"));
}
default -> {
if (ensoToTextCallback != null) {
cell.setCellValue(ensoToTextCallback.apply(value));
} else {
throw new IllegalStateException("Enso to text callback is not set. Unable to process value.");
}
}
}
}
}
/**
* Determines if headers should be written for the given range in {@code INFER} mode.
*
* Unlike in the {@code ExcelReader}, if empty this will default to True.
* @param excelSheet the Excel sheet to check.
* @param topRow top row index (1-based) of the range to check.
* @param startCol start column index (1-based) of the range to check.
* @param endCol end column index (1-based) of the range to check. If -1 will continue until end of row.
* @return true if the range has headers.
*/
private static boolean shouldWriteHeaders(ExcelSheet excelSheet, int topRow, int startCol, int endCol) {
ExcelRow row = excelSheet.get(topRow);
// If the first row is missing or empty, return true as defaults to writing headers.
if (row == null || row.isEmpty(startCol, endCol)) {
return true;
}
// If the first row is not empty, check if all text.
if (row.getCellsAsText(startCol, endCol) == null) {
return false;
}
// If the second row is missing or empty or contains text, return false.
ExcelRow nextRow = excelSheet.get(topRow + 1);
return (nextRow != null && nextRow.getCellsAsText(startCol, endCol) == null);
}
}

View File

@ -1,17 +1,23 @@
from Standard.Base import Nothing, File, Illegal_Argument_Error, True, False
import Standard.Base.System.File.Existing_File_Behavior
from Standard.Base.System.File import File_Already_Exists_Error
import Standard.Base.Data.Time.Date
import Standard.Table.Io.File_Read
import Standard.Table.Io.File_Format
import Standard.Table.Data.Column_Mapping
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Index
from Standard.Table.Io.Excel import Excel_Range, Sheet_Names, Range_Names, Sheet, Cell_Range
from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names
from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
import Standard.Test
import Standard.Test.Problems
import Standard.Examples
import project.Util
spec_fmt header file read_method =
Test.group header <|
Test.specify "should read the specified sheet by index and use correct headers" <|
@ -61,6 +67,96 @@ spec_fmt header file read_method =
t_3.at 'B' . to_vector . should_equal [Nothing, 30]
t_3.at 'C' . to_vector . should_equal [43.2, 54]
spec_write suffix test_sheet_name =
Test.group ("Write " + suffix + " Files") <|
## We disable parsing dates, because date support is not advanced enough
for these tests. This should ideally be re-enabled with the
completion of the following story:
https://www.pivotaltracker.com/story/show/181755990
no_dates = File_Format.Delimited "," value_formatter=(Data_Formatter date_formats=[] time_formats=[] datetime_formats=[])
out = Enso_Project.data / ('out.' + suffix)
table = Enso_Project.data/'varied_column.csv' . read (format = no_dates)
clothes = Enso_Project.data/'clothes.csv' . read (format = no_dates)
sub_clothes = clothes.select_columns (By_Index [0, 1])
Test.specify 'should write a table to non-existent file as a new sheet with headers' <|
out.delete_if_exists
table.write out
written = out.read
written.should_equal table
out.delete_if_exists
Test.specify 'should create new sheets at the start if index is 0' <|
out.delete_if_exists
table.write out (File_Format.Excel (Sheet 0))
clothes.write out (File_Format.Excel (Sheet 0))
read_1 = out.read (File_Format.Excel (Sheet "Sheet1"))
read_1 . should_equal table
read_2 = out.read (File_Format.Excel (Sheet "Sheet2"))
read_2 . should_equal clothes
read_3 = out.read (File_Format.Excel (Sheet_Names))
read_3 . should_equal ["Sheet2", "Sheet1"]
out.delete_if_exists
Test.specify 'should write a table to specific single cell location of an existing sheet' <|
out.delete_if_exists
(Enso_Project.data / test_sheet_name) . copy_to out
table.write out (File_Format.Excel (Cell_Range "Another!G1"))
written = out.read (File_Format.Excel (Cell_Range "Another!G1"))
written.should_equal table
out.delete_if_exists
Test.specify 'should clear out an existing fixed range and replace' <|
out.delete_if_exists
(Enso_Project.data / test_sheet_name) . copy_to out
sub_clothes.write out (File_Format.Excel (Cell_Range "Another!A1:D20"))
written = out.read (File_Format.Excel (Cell_Range "Another!A1"))
written.should_equal sub_clothes
out.delete_if_exists
Test.specify 'should clear out an existing range and replace' <|
out.delete_if_exists
(Enso_Project.data / test_sheet_name) . copy_to out
sub_clothes.write out (File_Format.Excel (Cell_Range "Another!A1"))
written = out.read (File_Format.Excel (Cell_Range "Another!A1"))
written.should_equal sub_clothes
out.delete_if_exists
Test.specify 'should result in Invalid_Location error if trying to write in a bad location' <|
out.delete_if_exists
(Enso_Project.data / test_sheet_name) . copy_to out
sub_clothes.write out (File_Format.Excel (Cell_Range "DoesNotExist!A1")) . should_fail_with Invalid_Location
sub_clothes.write out (File_Format.Excel (Cell_Range "DoesNotExist!A1:B2")) . should_fail_with Invalid_Location
sub_clothes.write out (File_Format.Excel (Cell_Range "SillyRangeName")) . should_fail_with Invalid_Location
out.delete_if_exists
Test.specify 'should result in Range_Exceeded error if trying to write in too small a range' <|
out.delete_if_exists
(Enso_Project.data / test_sheet_name) . copy_to out
sub_clothes.write out (File_Format.Excel (Cell_Range "Another!A1:B2")) . should_fail_with Range_Exceeded
out.delete_if_exists
Test.specify 'should result in Existing_Data error if in Error mode and trying to replace' <|
out.delete_if_exists
(Enso_Project.data / test_sheet_name) . copy_to out
sub_clothes.write out (File_Format.Excel (Sheet 1)) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data
sub_clothes.write out (File_Format.Excel (Sheet "Another")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data
sub_clothes.write out (File_Format.Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data
sub_clothes.write out (File_Format.Excel (Cell_Range "Sheet1!A9")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data
out.delete_if_exists
Test.specify 'should not allow adding a new sheet if in Error mode and not clashing' <|
out.delete_if_exists
(Enso_Project.data / test_sheet_name) . copy_to out
sub_clothes.write out (File_Format.Excel (Sheet "Testing")) on_existing_file=Existing_File_Behavior.Error . should_fail_with File_Already_Exists_Error
Test.specify 'should write a table to non-existent file as a new sheet without headers' <|
out.delete_if_exists
table.write out (File_Format.Excel (Sheet "Sheet1") headers=False)
written = out.read
written.should_equal (table.rename_columns (Column_Mapping.By_Position ['A', 'B', 'C', 'D', 'E', 'F']))
out.delete_if_exists
spec =
Test.group 'Excel Range' <|
check_range excel_range sheet_name tlbr_vector single_cell=False =
@ -321,4 +417,8 @@ spec =
problems = [Duplicate_Output_Column_Names ["DD"]]
Problems.test_problem_handling action problems tester
here.spec_write "xlsx" 'TestSheet.xlsx'
here.spec_write "xls" 'TestSheetOld.xls'
main = Test.Suite.run_main here.spec

View File

@ -10,7 +10,6 @@ import project.Delimited_Write_Spec
import project.Excel_Spec
import project.Json_Spec
import project.Table_Spec
import project.Spreadsheet_Spec
import project.Aggregate_Column_Spec
import project.Aggregate_Spec
@ -21,7 +20,6 @@ in_memory_spec =
Delimited_Write_Spec.spec
Excel_Spec.spec
Json_Spec.spec
Spreadsheet_Spec.spec
Table_Spec.spec
Model_Spec.spec
Aggregate_Column_Spec.spec

View File

@ -1,68 +0,0 @@
from Standard.Base import all
import Standard.Test
import Standard.Table
import Standard.Table.Io.File_Format
import Standard.Table.Io.File_Read
from Standard.Table.Io.Excel import Sheet
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
import project.Util
spec =
Test.group 'XLSX writing' <|
## We disable parsing dates, because date support is not advanced enough
for these tests. This should ideally be re-enabled with the
completion of the following story:
https://www.pivotaltracker.com/story/show/181755990
no_dates = File_Format.Delimited "," value_formatter=(Data_Formatter date_formats=[] time_formats=[] datetime_formats=[])
out = Enso_Project.data / 'out.xlsx'
table = Enso_Project.data/'varied_column.csv' . read (format = no_dates)
clothes = Enso_Project.data/'clothes.csv' . read (format = no_dates)
Test.specify 'should write tables to non-existent XLSX files' <|
out.delete_if_exists
table.write_xlsx out
written = out.read
written.should_equal table
out.delete_if_exists
Test.specify 'should create a new sheet if it already exists and write mode is Create' <|
out.delete_if_exists
table.write_xlsx out sheet='Foo'
clothes.write_xlsx out sheet='Foo'
read_1 = out.read (File_Format.Excel (Sheet "Foo"))
read_1 . should_equal table
read_2 = out.read (File_Format.Excel (Sheet "Foo 1"))
read_2 . should_equal clothes
out.delete_if_exists
Test.specify 'should overwrite a sheet if it already exists and write mode is Overwrite' <|
out.delete_if_exists
table.write_xlsx out sheet='Foo'
clothes.write_xlsx out sheet='Foo' write_mode=Table.Spreadsheet_Write_Mode.Overwrite
read = out.read (File_Format.Excel (Sheet "Foo"))
read . should_equal clothes
out.delete_if_exists
Test.specify 'should append to a sheet if it already exists and write mode is Append' <|
out.delete_if_exists
clothes.write_xlsx out sheet='Foo'
clothes.write_xlsx out sheet='Foo' write_mode=Table.Spreadsheet_Write_Mode.Append include_header=False
read = out.read (File_Format.Excel (Sheet "Foo"))
read . should_equal (clothes.concat clothes)
out.delete_if_exists
Test.specify 'should write multiple files if row limit is specified' <|
out_1 = Enso_Project.data / 'out_1.xlsx'
out_2 = Enso_Project.data / 'out_2.xlsx'
out_1.delete_if_exists
out_2.delete_if_exists
clothes.write_xlsx out max_rows_per_file=4
out_1.read . should_equal (clothes.take_start 4)
out_2.read . should_equal (clothes.take_end 2)
out_1.delete_if_exists
out_2.delete_if_exists
main = Test.Suite.run_main here.spec