mirror of
https://github.com/enso-org/enso.git
synced 2024-12-24 00:42:14 +03:00
Excel Reading (XLSX) using SAX Default Handler (#10877)
- Implement Excel reading as a SAXMLParser.
This commit is contained in:
parent
e6bcd5e485
commit
63ed629210
@ -49,7 +49,7 @@ type Excel_Workbook
|
|||||||
- file: The file to load.
|
- file: The file to load.
|
||||||
- xls_format: Whether to use the old XLS format (default is XLSX).
|
- xls_format: Whether to use the old XLS format (default is XLSX).
|
||||||
new : File | Temporary_File -> Boolean -> Excel_Workbook
|
new : File | Temporary_File -> Boolean -> Excel_Workbook
|
||||||
new file:(File | Temporary_File) xls_format=False =
|
new file:(File | Temporary_File) xls_format:Boolean=False =
|
||||||
file_for_errors = if file.is_a Temporary_File then Nothing else file
|
file_for_errors = if file.is_a Temporary_File then Nothing else file
|
||||||
|
|
||||||
continuation raw_file =
|
continuation raw_file =
|
||||||
@ -73,7 +73,7 @@ type Excel_Workbook
|
|||||||
- xls_format: Whether to use the old XLS format (default is XLSX).
|
- xls_format: Whether to use the old XLS format (default is XLSX).
|
||||||
- file: Optional file reference.
|
- file: Optional file reference.
|
||||||
from_stream : Input_Stream -> Boolean -> File | Nothing -> Excel_Workbook
|
from_stream : Input_Stream -> Boolean -> File | Nothing -> Excel_Workbook
|
||||||
from_stream stream xls_format=False file=Nothing = Excel_Reader.handle_bad_format file <|
|
from_stream stream xls_format:Boolean=False file=Nothing = Excel_Reader.handle_bad_format file <|
|
||||||
temp_file = Temporary_File.from_stream_light stream
|
temp_file = Temporary_File.from_stream_light stream
|
||||||
Excel_Workbook.new temp_file xls_format
|
Excel_Workbook.new temp_file xls_format
|
||||||
|
|
||||||
@ -89,8 +89,8 @@ type Excel_Workbook
|
|||||||
## PRIVATE
|
## PRIVATE
|
||||||
ICON metadata
|
ICON metadata
|
||||||
Returns the list of databases (or catalogs) for the connection.
|
Returns the list of databases (or catalogs) for the connection.
|
||||||
databases : Nothing
|
databases : Vector (Text | Nothing)
|
||||||
databases self = Nothing
|
databases self = [Nothing]
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
ICON metadata
|
ICON metadata
|
||||||
@ -109,7 +109,7 @@ type Excel_Workbook
|
|||||||
Arguments:
|
Arguments:
|
||||||
- database: The target file to open as an Excel_Workbook.
|
- database: The target file to open as an Excel_Workbook.
|
||||||
set_database : Text | File -> Excel_Workbook ! Illegal_Argument
|
set_database : Text | File -> Excel_Workbook ! Illegal_Argument
|
||||||
set_database self database =
|
set_database self database:(Text | File) =
|
||||||
if database == self.database then self else
|
if database == self.database then self else
|
||||||
file = File.new database
|
file = File.new database
|
||||||
if file.exists && file.is_directory.not then Excel_Workbook.new file self.xls_format else
|
if file.exists && file.is_directory.not then Excel_Workbook.new file self.xls_format else
|
||||||
@ -163,7 +163,7 @@ type Excel_Workbook
|
|||||||
Gets the names of all the named ranges.
|
Gets the names of all the named ranges.
|
||||||
named_ranges : Vector Text
|
named_ranges : Vector Text
|
||||||
named_ranges self = self.with_java_workbook java_workbook->
|
named_ranges self = self.with_java_workbook java_workbook->
|
||||||
Vector.from_polyglot_array (ExcelReader.readRangeNames java_workbook)
|
Vector.from_polyglot_array java_workbook.getRangeNames
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
ICON metadata
|
ICON metadata
|
||||||
|
@ -20,6 +20,7 @@ import org.apache.poi.openxml4j.opc.PackageAccess;
|
|||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
import org.apache.poi.ss.usermodel.Workbook;
|
import org.apache.poi.ss.usermodel.Workbook;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||||
|
import org.enso.table.excel.xssfreader.XSSFReaderWorkbook;
|
||||||
|
|
||||||
public class ExcelConnectionPool {
|
public class ExcelConnectionPool {
|
||||||
public static final ExcelConnectionPool INSTANCE = new ExcelConnectionPool();
|
public static final ExcelConnectionPool INSTANCE = new ExcelConnectionPool();
|
||||||
@ -64,7 +65,7 @@ public class ExcelConnectionPool {
|
|||||||
record.refCount = 1;
|
record.refCount = 1;
|
||||||
record.file = file;
|
record.file = file;
|
||||||
record.format = format;
|
record.format = format;
|
||||||
record.workbook = openWorkbook(file, format, false);
|
record.reopen(true);
|
||||||
records.put(key, record);
|
records.put(key, record);
|
||||||
return new ReadOnlyExcelConnection(this, key, record);
|
return new ReadOnlyExcelConnection(this, key, record);
|
||||||
}
|
}
|
||||||
@ -212,10 +213,10 @@ public class ExcelConnectionPool {
|
|||||||
private int refCount;
|
private int refCount;
|
||||||
private File file;
|
private File file;
|
||||||
private ExcelFileFormat format;
|
private ExcelFileFormat format;
|
||||||
private Workbook workbook;
|
private ExcelWorkbook workbook;
|
||||||
private IOException initializationException = null;
|
private IOException initializationException = null;
|
||||||
|
|
||||||
<T> T withWorkbook(Function<Workbook, T> action) throws IOException {
|
<T> T withWorkbook(Function<ExcelWorkbook, T> action) throws IOException {
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
return action.apply(accessCurrentWorkbook());
|
return action.apply(accessCurrentWorkbook());
|
||||||
}
|
}
|
||||||
@ -238,7 +239,10 @@ public class ExcelConnectionPool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
workbook = openWorkbook(file, format, false);
|
workbook =
|
||||||
|
format == ExcelFileFormat.XLSX
|
||||||
|
? new XSSFReaderWorkbook(file.getAbsolutePath())
|
||||||
|
: ExcelWorkbook.forPOIUserModel(openWorkbook(file, format, false));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
initializationException = e;
|
initializationException = e;
|
||||||
if (throwOnFailure) {
|
if (throwOnFailure) {
|
||||||
@ -248,7 +252,7 @@ public class ExcelConnectionPool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Workbook accessCurrentWorkbook() throws IOException {
|
private ExcelWorkbook accessCurrentWorkbook() throws IOException {
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
if (workbook == null) {
|
if (workbook == null) {
|
||||||
if (initializationException != null) {
|
if (initializationException != null) {
|
||||||
@ -278,7 +282,7 @@ public class ExcelConnectionPool {
|
|||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case XLSX -> {
|
case XLSX, XLSX_FALLBACK -> {
|
||||||
try {
|
try {
|
||||||
PackageAccess access = writeAccess ? PackageAccess.READ_WRITE : PackageAccess.READ;
|
PackageAccess access = writeAccess ? PackageAccess.READ_WRITE : PackageAccess.READ;
|
||||||
OPCPackage pkg = OPCPackage.open(file, access);
|
OPCPackage pkg = OPCPackage.open(file, access);
|
||||||
@ -300,7 +304,7 @@ public class ExcelConnectionPool {
|
|||||||
private static Workbook createEmptyWorkbook(ExcelFileFormat format) {
|
private static Workbook createEmptyWorkbook(ExcelFileFormat format) {
|
||||||
return switch (format) {
|
return switch (format) {
|
||||||
case XLS -> new HSSFWorkbook();
|
case XLS -> new HSSFWorkbook();
|
||||||
case XLSX -> new XSSFWorkbook();
|
case XLSX, XLSX_FALLBACK -> new XSSFWorkbook();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,5 +2,6 @@ package org.enso.table.excel;
|
|||||||
|
|
||||||
public enum ExcelFileFormat {
|
public enum ExcelFileFormat {
|
||||||
XLS,
|
XLS,
|
||||||
XLSX
|
XLSX,
|
||||||
|
XLSX_FALLBACK
|
||||||
}
|
}
|
||||||
|
@ -57,7 +57,7 @@ public class ExcelHeaders {
|
|||||||
|
|
||||||
String[] output = new String[currentEndCol - startCol + 1];
|
String[] output = new String[currentEndCol - startCol + 1];
|
||||||
for (int col = startCol; col <= currentEndCol; col++) {
|
for (int col = startCol; col <= currentEndCol; col++) {
|
||||||
String cellText = row.getFormattedCell(col);
|
String cellText = row.getCellText(col);
|
||||||
String name = cellText.isEmpty() ? "" : deduplicator.makeUnique(cellText);
|
String name = cellText.isEmpty() ? "" : deduplicator.makeUnique(cellText);
|
||||||
|
|
||||||
output[col - startCol] = name;
|
output[col - startCol] = name;
|
||||||
|
@ -197,7 +197,7 @@ public class ExcelRange {
|
|||||||
|
|
||||||
Context context = Context.getCurrent();
|
Context context = Context.getCurrent();
|
||||||
while (currentRow != null && !currentRow.isEmpty(excelRange.getLeftColumn(), rightColumn)) {
|
while (currentRow != null && !currentRow.isEmpty(excelRange.getLeftColumn(), rightColumn)) {
|
||||||
rightColumn = currentRow.findEndRight(rightColumn);
|
rightColumn = findEndRight(currentRow, rightColumn);
|
||||||
bottomRow++;
|
bottomRow++;
|
||||||
currentRow = sheet.get(bottomRow);
|
currentRow = sheet.get(bottomRow);
|
||||||
|
|
||||||
@ -212,6 +212,16 @@ public class ExcelRange {
|
|||||||
bottomRow - 1);
|
bottomRow - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int findEndRight(ExcelRow row, int start) {
|
||||||
|
Context context = Context.getCurrent();
|
||||||
|
int column = start;
|
||||||
|
while (!row.isEmpty(column + 1)) {
|
||||||
|
column++;
|
||||||
|
context.safepoint();
|
||||||
|
}
|
||||||
|
return column;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param index The index to the next character after the parsed value
|
* @param index The index to the next character after the parsed value
|
||||||
* @param value Parsed integer value or 0 if not valid
|
* @param value Parsed integer value or 0 if not valid
|
||||||
|
@ -10,114 +10,51 @@ import org.apache.poi.ss.usermodel.DateUtil;
|
|||||||
import org.apache.poi.ss.usermodel.ExcelNumberFormat;
|
import org.apache.poi.ss.usermodel.ExcelNumberFormat;
|
||||||
import org.apache.poi.ss.usermodel.FormulaError;
|
import org.apache.poi.ss.usermodel.FormulaError;
|
||||||
import org.apache.poi.ss.usermodel.Row;
|
import org.apache.poi.ss.usermodel.Row;
|
||||||
|
import org.apache.poi.ss.usermodel.Sheet;
|
||||||
import org.graalvm.polyglot.Context;
|
import org.graalvm.polyglot.Context;
|
||||||
|
|
||||||
/** Wrapper class to handle Excel rows. */
|
/** Wrapper class to handle Excel rows. */
|
||||||
public class ExcelRow {
|
public interface ExcelRow {
|
||||||
private static final DataFormatter formatter = new DataFormatter();
|
/** Gets the initial column index within the row (1-based). */
|
||||||
|
int getFirstColumn();
|
||||||
|
|
||||||
private final Row row;
|
/** Gets the final column index within the row (1-based). */
|
||||||
private final int firstColumn;
|
int getLastColumn();
|
||||||
private final int lastColumn;
|
|
||||||
private final boolean use1904Format;
|
|
||||||
|
|
||||||
public ExcelRow(Row row, boolean use1904Format) {
|
/** Gets the cell at the given index within the row (1-based). */
|
||||||
this.row = row;
|
Object getCellValue(int column);
|
||||||
this.firstColumn = row.getFirstCellNum() + 1;
|
|
||||||
this.lastColumn = row.getLastCellNum();
|
/** Gets the text of a cell at the given index within the row (1-based). */
|
||||||
this.use1904Format = use1904Format;
|
String getCellText(int column);
|
||||||
|
|
||||||
|
/** Gets the cell at the given index within the row (1-based). */
|
||||||
|
Cell get(int column);
|
||||||
|
|
||||||
|
/** Checks if the specified cell is empty. */
|
||||||
|
boolean isEmpty(int column);
|
||||||
|
|
||||||
|
/** Checks if the specified set of cells are empty. */
|
||||||
|
boolean isEmpty(int start, int end);
|
||||||
|
|
||||||
|
/** Gets the cells as text. */
|
||||||
|
String[] getCellsAsText(int startCol, int endCol);
|
||||||
|
|
||||||
|
/** Gets the underlying Apache POI Sheet object. */
|
||||||
|
static ExcelRow forPOIUserModel(Sheet sheet, int rowIndex, boolean use1904Format) {
|
||||||
|
var row = sheet.getRow(rowIndex - 1);
|
||||||
|
return row == null
|
||||||
|
? null
|
||||||
|
: new ExcelRowFromPOIUserModel(
|
||||||
|
row, row.getFirstCellNum() + 1, row.getLastCellNum(), use1904Format);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getFirstColumn() {
|
static boolean isEmptyHelper(ExcelRow row, int start, int end) {
|
||||||
return firstColumn;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getLastColumn() {
|
|
||||||
return lastColumn;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Cell get(int column) {
|
|
||||||
return (column < firstColumn || column > lastColumn) ? null : row.getCell(column - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Object getCellValue(int column) {
|
|
||||||
Cell cell = get(column);
|
|
||||||
CellType cellType = getCellType(cell);
|
|
||||||
switch (cellType) {
|
|
||||||
case NUMERIC:
|
|
||||||
double dblValue = cell.getNumericCellValue();
|
|
||||||
var nf = ExcelNumberFormat.from(cell, null);
|
|
||||||
if (nf != null && DateUtil.isADateFormat(nf.getIdx(), nf.getFormat())) {
|
|
||||||
var temporal =
|
|
||||||
use1904Format
|
|
||||||
? ExcelUtils.fromExcelDateTime1904(dblValue)
|
|
||||||
: ExcelUtils.fromExcelDateTime(dblValue);
|
|
||||||
|
|
||||||
if (temporal == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return switch (temporal) {
|
|
||||||
case LocalDate date -> {
|
|
||||||
var dateFormat = cell.getCellStyle().getDataFormatString();
|
|
||||||
yield (dateFormat.contains("h") || dateFormat.contains("H"))
|
|
||||||
? date.atStartOfDay(ZoneId.systemDefault())
|
|
||||||
: date;
|
|
||||||
}
|
|
||||||
case ZonedDateTime zdt -> {
|
|
||||||
if (!use1904Format || zdt.getYear() != 1904 || zdt.getDayOfYear() != 1) {
|
|
||||||
yield temporal;
|
|
||||||
}
|
|
||||||
var dateFormat = cell.getCellStyle().getDataFormatString();
|
|
||||||
yield (dateFormat.contains("y")
|
|
||||||
|| dateFormat.contains("M")
|
|
||||||
|| dateFormat.contains("d"))
|
|
||||||
? zdt
|
|
||||||
: zdt.toLocalTime();
|
|
||||||
}
|
|
||||||
default -> temporal;
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
if (dblValue == (long) dblValue) {
|
|
||||||
return (long) dblValue;
|
|
||||||
} else {
|
|
||||||
return dblValue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case STRING:
|
|
||||||
return cell.getStringCellValue();
|
|
||||||
case BOOLEAN:
|
|
||||||
return cell.getBooleanCellValue();
|
|
||||||
default:
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static CellType getCellType(Cell cell) {
|
|
||||||
if (cell == null) {
|
|
||||||
return CellType._NONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
CellType cellType = cell.getCellType();
|
|
||||||
if (cellType == CellType.FORMULA) {
|
|
||||||
cellType = cell.getCachedFormulaResultType();
|
|
||||||
}
|
|
||||||
|
|
||||||
return cellType;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isEmpty(int column) {
|
|
||||||
CellType cellType = getCellType(get(column));
|
|
||||||
return (cellType == CellType._NONE) || (cellType == CellType.BLANK);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isEmpty(int start, int end) {
|
|
||||||
Context context = Context.getCurrent();
|
Context context = Context.getCurrent();
|
||||||
int currentEnd = end == -1 ? getLastColumn() : end;
|
int currentEnd = end == -1 ? row.getLastColumn() : end;
|
||||||
for (int column = Math.max(getFirstColumn(), start);
|
for (int column = Math.max(row.getFirstColumn(), start);
|
||||||
column <= Math.min(getLastColumn(), currentEnd);
|
column <= Math.min(row.getLastColumn(), currentEnd);
|
||||||
column++) {
|
column++) {
|
||||||
if (!isEmpty(column)) {
|
if (!row.isEmpty(column)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -126,63 +63,144 @@ public class ExcelRow {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int findEndRight(int start) {
|
record ExcelRowFromPOIUserModel(Row row, int firstColumn, int lastColumn, boolean use1904Format)
|
||||||
Context context = Context.getCurrent();
|
implements ExcelRow {
|
||||||
int column = start;
|
private static final DataFormatter formatter = new DataFormatter();
|
||||||
while (!isEmpty(column + 1)) {
|
|
||||||
column++;
|
|
||||||
context.safepoint();
|
|
||||||
}
|
|
||||||
return column;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the formatted cell value. */
|
public int getFirstColumn() {
|
||||||
public String getFormattedCell(int col) {
|
return firstColumn;
|
||||||
var cell = get(col);
|
|
||||||
if (cell == null) {
|
|
||||||
return "";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var rawCellType = cell.getCellType();
|
public int getLastColumn() {
|
||||||
var cellType =
|
return lastColumn;
|
||||||
rawCellType == CellType.FORMULA ? cell.getCachedFormulaResultType() : rawCellType;
|
|
||||||
|
|
||||||
return switch (cellType) {
|
|
||||||
case ERROR ->
|
|
||||||
// Want to show the error message rather than empty.
|
|
||||||
FormulaError.forInt(cell.getErrorCellValue()).getString();
|
|
||||||
case NUMERIC -> {
|
|
||||||
// Special handling for Number or Date cells as want to keep formatting.
|
|
||||||
var format = ExcelNumberFormat.from(cell, null);
|
|
||||||
var value = cell.getNumericCellValue();
|
|
||||||
yield format == null
|
|
||||||
? Double.toString(value)
|
|
||||||
: formatter.formatRawCellContents(value, format.getIdx(), format.getFormat());
|
|
||||||
}
|
|
||||||
default -> {
|
|
||||||
// Use the default read and then toString.
|
|
||||||
var value = getCellValue(col);
|
|
||||||
yield value == null ? "" : value.toString();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
public String[] getCellsAsText(int startCol, int endCol) {
|
|
||||||
Context context = Context.getCurrent();
|
|
||||||
int currentEndCol = endCol == -1 ? getLastColumn() : endCol;
|
|
||||||
|
|
||||||
String[] output = new String[currentEndCol - startCol + 1];
|
|
||||||
for (int col = startCol; col <= currentEndCol; col++) {
|
|
||||||
Cell cell = get(col);
|
|
||||||
CellType type = ExcelRow.getCellType(cell);
|
|
||||||
if (type != CellType._NONE && type != CellType.BLANK && type != CellType.STRING) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
output[col - startCol] =
|
|
||||||
type == CellType.STRING && cell != null ? cell.getStringCellValue() : "";
|
|
||||||
context.safepoint();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return output;
|
public Cell get(int column) {
|
||||||
|
return (column < firstColumn || column > lastColumn) ? null : row.getCell(column - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getCellValue(int column) {
|
||||||
|
Cell cell = get(column);
|
||||||
|
CellType cellType = getCellType(cell);
|
||||||
|
switch (cellType) {
|
||||||
|
case NUMERIC:
|
||||||
|
double dblValue = cell.getNumericCellValue();
|
||||||
|
var nf = ExcelNumberFormat.from(cell, null);
|
||||||
|
if (nf != null && DateUtil.isADateFormat(nf.getIdx(), nf.getFormat())) {
|
||||||
|
var temporal =
|
||||||
|
use1904Format
|
||||||
|
? ExcelUtils.fromExcelDateTime1904(dblValue)
|
||||||
|
: ExcelUtils.fromExcelDateTime(dblValue);
|
||||||
|
|
||||||
|
if (temporal == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return switch (temporal) {
|
||||||
|
case LocalDate date -> {
|
||||||
|
var dateFormat = cell.getCellStyle().getDataFormatString();
|
||||||
|
yield (dateFormat.contains("h") || dateFormat.contains("H"))
|
||||||
|
? date.atStartOfDay(ZoneId.systemDefault())
|
||||||
|
: date;
|
||||||
|
}
|
||||||
|
case ZonedDateTime zdt -> {
|
||||||
|
if (!use1904Format || zdt.getYear() != 1904 || zdt.getDayOfYear() != 1) {
|
||||||
|
yield temporal;
|
||||||
|
}
|
||||||
|
var dateFormat = cell.getCellStyle().getDataFormatString();
|
||||||
|
yield (dateFormat.contains("y")
|
||||||
|
|| dateFormat.contains("M")
|
||||||
|
|| dateFormat.contains("d"))
|
||||||
|
? zdt
|
||||||
|
: zdt.toLocalTime();
|
||||||
|
}
|
||||||
|
default -> temporal;
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
if (dblValue == (long) dblValue) {
|
||||||
|
return (long) dblValue;
|
||||||
|
} else {
|
||||||
|
return dblValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case STRING:
|
||||||
|
return cell.getStringCellValue();
|
||||||
|
case BOOLEAN:
|
||||||
|
return cell.getBooleanCellValue();
|
||||||
|
default:
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCellText(int column) {
|
||||||
|
var cell = get(column);
|
||||||
|
if (cell == null) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
var rawCellType = cell.getCellType();
|
||||||
|
var cellType =
|
||||||
|
rawCellType == CellType.FORMULA ? cell.getCachedFormulaResultType() : rawCellType;
|
||||||
|
|
||||||
|
return switch (cellType) {
|
||||||
|
case ERROR ->
|
||||||
|
// Want to show the error message rather than empty.
|
||||||
|
FormulaError.forInt(cell.getErrorCellValue()).getString();
|
||||||
|
case NUMERIC -> {
|
||||||
|
// Special handling for Number or Date cells as want to keep formatting.
|
||||||
|
var format = ExcelNumberFormat.from(cell, null);
|
||||||
|
var value = cell.getNumericCellValue();
|
||||||
|
yield format == null
|
||||||
|
? Double.toString(value)
|
||||||
|
: formatter.formatRawCellContents(value, format.getIdx(), format.getFormat());
|
||||||
|
}
|
||||||
|
default -> {
|
||||||
|
// Use the default read and then toString.
|
||||||
|
var value = getCellValue(column);
|
||||||
|
yield value == null ? "" : value.toString();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEmpty(int column) {
|
||||||
|
CellType cellType = getCellType(get(column));
|
||||||
|
return (cellType == CellType._NONE) || (cellType == CellType.BLANK);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEmpty(int start, int end) {
|
||||||
|
return isEmptyHelper(this, start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] getCellsAsText(int startCol, int endCol) {
|
||||||
|
Context context = Context.getCurrent();
|
||||||
|
int currentEndCol = endCol == -1 ? getLastColumn() : endCol;
|
||||||
|
|
||||||
|
String[] output = new String[currentEndCol - startCol + 1];
|
||||||
|
for (int col = startCol; col <= currentEndCol; col++) {
|
||||||
|
Cell cell = get(col);
|
||||||
|
CellType type = getCellType(cell);
|
||||||
|
if (type != CellType._NONE && type != CellType.BLANK && type != CellType.STRING) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
output[col - startCol] =
|
||||||
|
type == CellType.STRING && cell != null ? cell.getStringCellValue() : "";
|
||||||
|
context.safepoint();
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static CellType getCellType(Cell cell) {
|
||||||
|
if (cell == null) {
|
||||||
|
return CellType._NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
CellType cellType = cell.getCellType();
|
||||||
|
if (cellType == CellType.FORMULA) {
|
||||||
|
cellType = cell.getCachedFormulaResultType();
|
||||||
|
}
|
||||||
|
|
||||||
|
return cellType;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,37 +1,83 @@
|
|||||||
package org.enso.table.excel;
|
package org.enso.table.excel;
|
||||||
|
|
||||||
import org.apache.poi.ss.usermodel.Row;
|
|
||||||
import org.apache.poi.ss.usermodel.Sheet;
|
import org.apache.poi.ss.usermodel.Sheet;
|
||||||
import org.apache.poi.ss.usermodel.Workbook;
|
import org.apache.poi.ss.usermodel.Workbook;
|
||||||
|
|
||||||
/** Wrapper class to handle Excel sheets. */
|
/** Wrapper class to handle Excel sheets. */
|
||||||
public class ExcelSheet {
|
public interface ExcelSheet {
|
||||||
private final Sheet sheet;
|
/** Gets the index of the sheet within the workbook (0-based). */
|
||||||
private final int firstRow;
|
int getSheetIndex();
|
||||||
private final int lastRow;
|
|
||||||
private final boolean use1904Format;
|
|
||||||
|
|
||||||
public ExcelSheet(Workbook workbook, int sheetIndex) {
|
/** Gets the name of the sheet. */
|
||||||
this.sheet = workbook.getSheetAt(sheetIndex);
|
String getName();
|
||||||
this.firstRow = sheet.getFirstRowNum() + 1;
|
|
||||||
this.lastRow = sheet.getLastRowNum() + 1;
|
/** Gets the initial row index within the sheet (1-based). */
|
||||||
this.use1904Format = ExcelUtils.is1904DateSystem(workbook);
|
int getFirstRow();
|
||||||
|
|
||||||
|
/** Gets the final row index within the sheet (1-based). */
|
||||||
|
int getLastRow();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the row at the given index within the sheet (1-based)
|
||||||
|
*
|
||||||
|
* @param row the row index (1-based)/
|
||||||
|
* @return the row object or null if the row index is out of range or doesn't exist.
|
||||||
|
*/
|
||||||
|
ExcelRow get(int row);
|
||||||
|
|
||||||
|
/** Gets the underlying Apache POI Sheet object - may be null. Provided for Writer use only. */
|
||||||
|
Sheet getSheet();
|
||||||
|
|
||||||
|
/** Gets the underlying Apache POI Sheet object. */
|
||||||
|
static ExcelSheet forPOIUserModel(Workbook workbook, int sheetIndex) {
|
||||||
|
var sheet = workbook.getSheetAt(sheetIndex);
|
||||||
|
return new ExcelSheetFromPOIUserModel(
|
||||||
|
sheet,
|
||||||
|
sheetIndex,
|
||||||
|
sheet.getSheetName(),
|
||||||
|
sheet.getFirstRowNum() + 1,
|
||||||
|
sheet.getLastRowNum() + 1,
|
||||||
|
ExcelUtils.is1904DateSystem(workbook));
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getLastRow() {
|
record ExcelSheetFromPOIUserModel(
|
||||||
return lastRow;
|
Sheet sheet,
|
||||||
}
|
int sheetIndex,
|
||||||
|
String sheetName,
|
||||||
|
int firstRow,
|
||||||
|
int lastRow,
|
||||||
|
boolean use1904Format)
|
||||||
|
implements ExcelSheet {
|
||||||
|
@Override
|
||||||
|
public int getSheetIndex() {
|
||||||
|
return sheetIndex;
|
||||||
|
}
|
||||||
|
|
||||||
public int getFirstRow() {
|
@Override
|
||||||
return firstRow;
|
public String getName() {
|
||||||
}
|
return sheetName;
|
||||||
|
}
|
||||||
|
|
||||||
public ExcelRow get(int row) {
|
@Override
|
||||||
Row underlyingRow = row < firstRow || row > lastRow ? null : sheet.getRow(row - 1);
|
public int getFirstRow() {
|
||||||
return underlyingRow == null ? null : new ExcelRow(underlyingRow, use1904Format);
|
return firstRow;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Sheet getSheet() {
|
@Override
|
||||||
return sheet;
|
public int getLastRow() {
|
||||||
|
return lastRow;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ExcelRow get(int row) {
|
||||||
|
return row < firstRow || row > lastRow
|
||||||
|
? null
|
||||||
|
: ExcelRow.forPOIUserModel(sheet, row, use1904Format);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Sheet getSheet() {
|
||||||
|
return sheet;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
package org.enso.table.excel;
|
package org.enso.table.excel;
|
||||||
|
|
||||||
import java.time.*;
|
import java.time.LocalDate;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.time.LocalTime;
|
||||||
|
import java.time.ZoneId;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
import java.time.temporal.ChronoUnit;
|
import java.time.temporal.ChronoUnit;
|
||||||
import java.time.temporal.Temporal;
|
import java.time.temporal.Temporal;
|
||||||
import org.apache.poi.ss.usermodel.Workbook;
|
import org.apache.poi.ss.usermodel.Workbook;
|
||||||
|
@ -0,0 +1,123 @@
|
|||||||
|
package org.enso.table.excel;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.poi.ss.usermodel.Name;
|
||||||
|
|
||||||
|
/** Represents an Excel workbook. Wraps the underlying Apache POI Workbook object. */
|
||||||
|
public interface ExcelWorkbook {
|
||||||
|
/**
|
||||||
|
* Get the number of spreadsheets in the workbook
|
||||||
|
*
|
||||||
|
* @return the number of sheets
|
||||||
|
*/
|
||||||
|
int getNumberOfSheets();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the index of the sheet by its name
|
||||||
|
*
|
||||||
|
* @param name the sheet name
|
||||||
|
* @return index of the sheet (0 based)
|
||||||
|
*/
|
||||||
|
int getSheetIndex(String name);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the sheet name
|
||||||
|
*
|
||||||
|
* @param sheet sheet number (0 based)
|
||||||
|
* @return Sheet name
|
||||||
|
*/
|
||||||
|
String getSheetName(int sheet);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the total number of defined names in this workbook
|
||||||
|
*/
|
||||||
|
int getNumberOfNames();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get all the range names in the workbook
|
||||||
|
*
|
||||||
|
* @return an array of range names
|
||||||
|
*/
|
||||||
|
String[] getRangeNames();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the formula for a named range.
|
||||||
|
*
|
||||||
|
* @param name the name of the range.
|
||||||
|
* @return the formula for the range or null if not found.
|
||||||
|
*/
|
||||||
|
String getNameFormula(String name);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a sheet by its index
|
||||||
|
*
|
||||||
|
* @param sheetIndex the index of the sheet (0 based)
|
||||||
|
* @return the sheet as an ExcelSheet object
|
||||||
|
* @throws IllegalArgumentException if the sheet index is out of range.
|
||||||
|
*/
|
||||||
|
ExcelSheet getSheetAt(int sheetIndex);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the underlying input resource (File or Stream), from which the Workbook was read.
|
||||||
|
*
|
||||||
|
* <p>Once this has been called, no further operations, updates or reads should be performed on
|
||||||
|
* the Workbook.
|
||||||
|
*/
|
||||||
|
void close() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an ExcelWorkbook object from an Apache POI Workbook object
|
||||||
|
*
|
||||||
|
* @param workbook the Apache POI Workbook object
|
||||||
|
* @return the ExcelWorkbook object
|
||||||
|
*/
|
||||||
|
static ExcelWorkbook forPOIUserModel(org.apache.poi.ss.usermodel.Workbook workbook) {
|
||||||
|
return new ExcelWorkbookFromPOIUserModel(workbook);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ** Wrap a Workbook object in the interface. */
|
||||||
|
record ExcelWorkbookFromPOIUserModel(org.apache.poi.ss.usermodel.Workbook workbook)
|
||||||
|
implements ExcelWorkbook {
|
||||||
|
@Override
|
||||||
|
public int getNumberOfSheets() {
|
||||||
|
return workbook.getNumberOfSheets();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getSheetIndex(String name) {
|
||||||
|
return workbook.getSheetIndex(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getSheetName(int sheet) {
|
||||||
|
return workbook.getSheetName(sheet);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getNumberOfNames() {
|
||||||
|
return workbook.getNumberOfNames();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String[] getRangeNames() {
|
||||||
|
var names = workbook.getAllNames();
|
||||||
|
return names.stream().map(Name::getNameName).toArray(String[]::new);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getNameFormula(String name) {
|
||||||
|
var namedRange = workbook.getName(name);
|
||||||
|
return namedRange == null ? null : namedRange.getRefersToFormula();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ExcelSheet getSheetAt(int sheetIndex) {
|
||||||
|
return ExcelSheet.forPOIUserModel(workbook, sheetIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
workbook.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -2,7 +2,6 @@ package org.enso.table.excel;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import org.apache.poi.ss.usermodel.Workbook;
|
|
||||||
|
|
||||||
public class ReadOnlyExcelConnection implements AutoCloseable {
|
public class ReadOnlyExcelConnection implements AutoCloseable {
|
||||||
|
|
||||||
@ -28,7 +27,7 @@ public class ReadOnlyExcelConnection implements AutoCloseable {
|
|||||||
record = null;
|
record = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized <T> T withWorkbook(Function<Workbook, T> f) throws IOException {
|
public synchronized <T> T withWorkbook(Function<ExcelWorkbook, T> f) throws IOException {
|
||||||
if (record == null) {
|
if (record == null) {
|
||||||
throw new IllegalStateException("ReadOnlyExcelConnection is being used after it was closed.");
|
throw new IllegalStateException("ReadOnlyExcelConnection is being used after it was closed.");
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,29 @@
|
|||||||
|
package org.enso.table.excel.xssfreader;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.apache.poi.xssf.model.StylesTable;
|
||||||
|
|
||||||
|
/** Provides the format strings for number formats in an XSSF workbook. */
|
||||||
|
public class XSSFReaderFormats {
|
||||||
|
private final StylesTable stylesTable;
|
||||||
|
private final Map<Short, String> numberFormats = new HashMap<>();
|
||||||
|
|
||||||
|
public XSSFReaderFormats(StylesTable stylesTable) {
|
||||||
|
this.stylesTable = stylesTable;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getNumberFormatAt(short styleIdx) {
|
||||||
|
if (numberFormats.containsKey(styleIdx)) {
|
||||||
|
return numberFormats.get(styleIdx);
|
||||||
|
}
|
||||||
|
|
||||||
|
var style = stylesTable.getStyleAt(styleIdx);
|
||||||
|
var format = style == null ? "General" : style.getDataFormatString();
|
||||||
|
if (format == null || format.equals("General")) {
|
||||||
|
format = "";
|
||||||
|
}
|
||||||
|
numberFormats.put(styleIdx, format);
|
||||||
|
return format;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,125 @@
|
|||||||
|
package org.enso.table.excel.xssfreader;
|
||||||
|
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.util.SortedMap;
|
||||||
|
import org.apache.poi.ss.usermodel.Cell;
|
||||||
|
import org.apache.poi.ss.usermodel.DataFormatter;
|
||||||
|
import org.enso.table.excel.ExcelRow;
|
||||||
|
|
||||||
|
public class XSSFReaderRow implements ExcelRow {
|
||||||
|
private static final DataFormatter formatter = new DataFormatter();
|
||||||
|
private final SortedMap<Short, XSSFReaderSheetXMLHandler.CellValue> data;
|
||||||
|
private final boolean use1904Dates;
|
||||||
|
|
||||||
|
public XSSFReaderRow(
|
||||||
|
SortedMap<Short, XSSFReaderSheetXMLHandler.CellValue> data, boolean use1904Dates) {
|
||||||
|
this.data = data;
|
||||||
|
this.use1904Dates = use1904Dates;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getFirstColumn() {
|
||||||
|
return data.firstKey();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getLastColumn() {
|
||||||
|
return data.lastKey();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Cell get(int column) {
|
||||||
|
// Not supported as we don't have the underlying Apache POI Cell object.
|
||||||
|
throw new UnsupportedOperationException("XSSFReader does not support getting the Cell object.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getCellValue(int column) {
|
||||||
|
var cell = data.get((short) column);
|
||||||
|
if (cell == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var dataType = cell.dataType();
|
||||||
|
return switch (dataType) {
|
||||||
|
case BLANK -> null;
|
||||||
|
case BOOL -> cell.getBooleanValue();
|
||||||
|
case DATE -> LocalDateTime.parse(cell.strValue()); // Don't believe used by Excel.
|
||||||
|
case INLINE_STRING, SST_STRING, FORMULA_STRING -> cell.strValue();
|
||||||
|
case INTEGER -> cell.getIntegerValue();
|
||||||
|
case NUMBER -> {
|
||||||
|
double dbl = cell.getNumberValue();
|
||||||
|
long longVal = (long) dbl;
|
||||||
|
if (dbl == longVal) {
|
||||||
|
yield (long) dbl;
|
||||||
|
} else {
|
||||||
|
yield dbl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case OLE_DATE -> cell.getDateValue(use1904Dates);
|
||||||
|
case OLE_DATETIME -> cell.getDateTimeValue(use1904Dates);
|
||||||
|
case ERROR -> null;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getCellText(int column) {
|
||||||
|
var cell = data.get((short) column);
|
||||||
|
if (cell == null) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
var dataType = cell.dataType();
|
||||||
|
return switch (dataType) {
|
||||||
|
case BLANK -> "";
|
||||||
|
case NUMBER, OLE_DATETIME, OLE_DATE, INTEGER -> {
|
||||||
|
// Special handling for Number or Date cells as want to keep formatting.
|
||||||
|
var formatText = cell.format();
|
||||||
|
if (formatText == null || formatText.isEmpty()) {
|
||||||
|
yield cell.strValue();
|
||||||
|
}
|
||||||
|
yield formatter.formatRawCellContents(cell.getNumberValue(), -1, formatText, use1904Dates);
|
||||||
|
}
|
||||||
|
case BOOL -> cell.getBooleanValue() ? "TRUE" : "FALSE";
|
||||||
|
default -> cell.strValue();
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isEmpty(int column) {
|
||||||
|
var cell = data.get((short) column);
|
||||||
|
return cell == null || cell.strValue().isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isEmpty(int start, int end) {
|
||||||
|
int currentEnd = end == -1 ? getLastColumn() : end;
|
||||||
|
for (int column = Math.max(getFirstColumn(), start);
|
||||||
|
column <= Math.min(getLastColumn(), currentEnd);
|
||||||
|
column++) {
|
||||||
|
if (!isEmpty(column)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String[] getCellsAsText(int startCol, int endCol) {
|
||||||
|
int currentEndCol = endCol == -1 ? getLastColumn() : endCol;
|
||||||
|
|
||||||
|
String[] output = new String[currentEndCol - startCol + 1];
|
||||||
|
for (int col = startCol; col <= currentEndCol; col++) {
|
||||||
|
|
||||||
|
var cell = data.get((short) col);
|
||||||
|
if (cell != null && !cell.dataType().isString()) {
|
||||||
|
// Short circuit if find not a string cell.
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
output[col - startCol] = cell == null ? "" : cell.strValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,150 @@
|
|||||||
|
package org.enso.table.excel.xssfreader;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.SortedMap;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||||
|
import org.apache.poi.ss.usermodel.Sheet;
|
||||||
|
import org.apache.poi.util.XMLHelper;
|
||||||
|
import org.enso.table.excel.ExcelRow;
|
||||||
|
import org.enso.table.excel.ExcelSheet;
|
||||||
|
import org.xml.sax.InputSource;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
public class XSSFReaderSheet implements ExcelSheet {
|
||||||
|
private final int sheetIdx;
|
||||||
|
private final String sheetName;
|
||||||
|
private final String relId;
|
||||||
|
private final XSSFReaderWorkbook parent;
|
||||||
|
|
||||||
|
private boolean hasReadSheetData = false;
|
||||||
|
private String dimensions;
|
||||||
|
private int firstRow;
|
||||||
|
private int lastRow;
|
||||||
|
private Map<Integer, SortedMap<Short, XSSFReaderSheetXMLHandler.CellValue>> rowData;
|
||||||
|
|
||||||
|
public XSSFReaderSheet(int sheetIdx, String sheetName, String relId, XSSFReaderWorkbook parent) {
|
||||||
|
this.sheetIdx = sheetIdx;
|
||||||
|
this.sheetName = sheetName;
|
||||||
|
this.relId = relId;
|
||||||
|
this.parent = parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized void ensureReadSheetData() {
|
||||||
|
if (hasReadSheetData) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
var strings = parent.getSharedStrings();
|
||||||
|
var styles = parent.getStyles();
|
||||||
|
var handler =
|
||||||
|
new XSSFReaderSheetXMLHandler(styles, strings) {
|
||||||
|
@Override
|
||||||
|
protected void onDimensions(String dimension) {
|
||||||
|
handleOnDimensions(dimension);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void onStartRow(int rowNum) {
|
||||||
|
handleOnStartRow(rowNum);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void onCell(int rowNumber, short columnNumber, String ref, CellValue value) {
|
||||||
|
handleOnCell(rowNumber, columnNumber, value);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
var xmlReader = XMLHelper.newXMLReader();
|
||||||
|
xmlReader.setContentHandler(handler);
|
||||||
|
|
||||||
|
rowData = new HashMap<>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
parent.withReader(
|
||||||
|
reader -> {
|
||||||
|
try {
|
||||||
|
var sheet = reader.getSheet(relId);
|
||||||
|
xmlReader.parse(new InputSource(sheet));
|
||||||
|
} catch (SAXException | InvalidFormatException | IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
hasReadSheetData = true;
|
||||||
|
} catch (SAXException | ParserConfigurationException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getSheetIndex() {
|
||||||
|
return sheetIdx;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getName() {
|
||||||
|
return sheetName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDimensions() {
|
||||||
|
ensureReadSheetData();
|
||||||
|
return dimensions;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getFirstRow() {
|
||||||
|
ensureReadSheetData();
|
||||||
|
return firstRow;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getLastRow() {
|
||||||
|
ensureReadSheetData();
|
||||||
|
return lastRow;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ExcelRow get(int row) {
|
||||||
|
ensureReadSheetData();
|
||||||
|
|
||||||
|
if (!rowData.containsKey(row)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new XSSFReaderRow(rowData.get(row), parent.use1904Format());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Sheet getSheet() {
|
||||||
|
// Not supported as we don't have the underlying Apache POI Sheet object.
|
||||||
|
throw new UnsupportedOperationException(
|
||||||
|
"XSSFReader does not support getting the Sheet object.");
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void handleOnDimensions(String dimension) {
|
||||||
|
dimensions = dimension;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void handleOnStartRow(int rowNum) {
|
||||||
|
if (firstRow == 0 || rowNum < firstRow) {
|
||||||
|
firstRow = rowNum;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastRow == 0 || rowNum > lastRow) {
|
||||||
|
lastRow = rowNum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void handleOnCell(
|
||||||
|
int rowNumber, short columnNumber, XSSFReaderSheetXMLHandler.CellValue value) {
|
||||||
|
rowData.computeIfAbsent(rowNumber, k -> new TreeMap<>()).put(columnNumber, value);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,259 @@
|
|||||||
|
package org.enso.table.excel.xssfreader;
|
||||||
|
|
||||||
|
import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML;
|
||||||
|
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.time.temporal.Temporal;
|
||||||
|
import org.apache.poi.ss.usermodel.DateUtil;
|
||||||
|
import org.apache.poi.xssf.model.SharedStrings;
|
||||||
|
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
|
||||||
|
import org.enso.table.excel.ExcelUtils;
|
||||||
|
import org.xml.sax.Attributes;
|
||||||
|
import org.xml.sax.helpers.DefaultHandler;
|
||||||
|
|
||||||
|
/** Based on the XSSFSheetXMLHandler class from Apache POI. */
|
||||||
|
/**
|
||||||
|
* SAX-based Handler to Read Excel XML on top of POI support. Technical specification can be found
|
||||||
|
* at:
|
||||||
|
* https://learn.microsoft.com/en-us/openspecs/office_standards/ms-oe376/db9b9b72-b10b-4e7e-844c-09f88c972219
|
||||||
|
* https://ecma-international.org/publications-and-standards/standards/ecma-376/
|
||||||
|
*/
|
||||||
|
public class XSSFReaderSheetXMLHandler extends DefaultHandler {
|
||||||
|
private final XSSFReaderFormats styles;
|
||||||
|
private final SharedStrings sharedStrings;
|
||||||
|
|
||||||
|
public enum XSSDataType {
|
||||||
|
BLANK,
|
||||||
|
BOOL,
|
||||||
|
DATE,
|
||||||
|
ERROR,
|
||||||
|
INLINE_STRING,
|
||||||
|
SST_STRING,
|
||||||
|
NUMBER,
|
||||||
|
INTEGER,
|
||||||
|
OLE_DATE,
|
||||||
|
OLE_DATETIME,
|
||||||
|
FORMULA_STRING;
|
||||||
|
|
||||||
|
public boolean isString() {
|
||||||
|
return this == INLINE_STRING || this == SST_STRING || this == FORMULA_STRING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record if seen a value element
|
||||||
|
private boolean seenValue;
|
||||||
|
|
||||||
|
// Set when V start element is seen
|
||||||
|
private boolean vIsOpen;
|
||||||
|
|
||||||
|
// Set when an Inline String "is" is seen
|
||||||
|
private boolean isIsOpen;
|
||||||
|
|
||||||
|
// The current row being read (or -1 if not in a row)
|
||||||
|
private int rowNumber = -1;
|
||||||
|
|
||||||
|
// Handle missing rowNumber in the XML (happens in Excel), first row would be row 1.
|
||||||
|
private int nextRowNumber = 1;
|
||||||
|
|
||||||
|
// The current cell being read (or null if not in a cell)
|
||||||
|
private String cellRef;
|
||||||
|
|
||||||
|
// Set when cell start element is seen, used when cell close element is seen.
|
||||||
|
private XSSDataType dataType;
|
||||||
|
|
||||||
|
// Gathers characters as they are seen.
|
||||||
|
private final StringBuilder value = new StringBuilder(64);
|
||||||
|
private String numberFormat = null;
|
||||||
|
|
||||||
|
public XSSFReaderSheetXMLHandler(XSSFReaderFormats styles, SharedStrings strings) {
|
||||||
|
this.styles = styles;
|
||||||
|
this.sharedStrings = strings;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isTextTag(String name) {
|
||||||
|
return "v".equals(name) || "inlineStr".equals(name) || ("t".equals(name) && isIsOpen);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startElement(String uri, String localName, String qName, Attributes attributes) {
|
||||||
|
if (uri != null && !NS_SPREADSHEETML.equals(uri)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isTextTag(localName)) {
|
||||||
|
seenValue = true;
|
||||||
|
vIsOpen = true;
|
||||||
|
if (!isIsOpen) {
|
||||||
|
value.setLength(0);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch (localName) {
|
||||||
|
case "dimension": // Dimensions of sheet
|
||||||
|
var dimension = attributes.getValue("ref");
|
||||||
|
if (dimension != null) {
|
||||||
|
onDimensions(dimension);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "row": // Row
|
||||||
|
String rowNumStr = attributes.getValue("r");
|
||||||
|
rowNumber = rowNumStr == null ? nextRowNumber : Integer.parseInt(rowNumStr);
|
||||||
|
onStartRow(rowNumber);
|
||||||
|
break;
|
||||||
|
case "c": // Cell
|
||||||
|
cellRef = attributes.getValue("r");
|
||||||
|
seenValue = false;
|
||||||
|
|
||||||
|
String cellType = attributes.getValue("t");
|
||||||
|
if (cellType == null) {
|
||||||
|
cellType = "n"; // Number is default
|
||||||
|
}
|
||||||
|
|
||||||
|
dataType =
|
||||||
|
switch (cellType) {
|
||||||
|
case "b" -> XSSDataType.BOOL;
|
||||||
|
case "e" -> XSSDataType.ERROR;
|
||||||
|
case "d" -> XSSDataType.DATE; // Date in ISO 8601 format.
|
||||||
|
case "inlineStr" -> XSSDataType.INLINE_STRING;
|
||||||
|
case "s" -> XSSDataType.SST_STRING;
|
||||||
|
case "str" -> XSSDataType.FORMULA_STRING; // String formula
|
||||||
|
default -> XSSDataType.NUMBER;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Read the format for NUMBER
|
||||||
|
numberFormat = null;
|
||||||
|
if (dataType == XSSDataType.NUMBER) {
|
||||||
|
String cellStyleStr = attributes.getValue("s");
|
||||||
|
if (cellStyleStr != null) {
|
||||||
|
short styleIndex = (short) Integer.parseInt(cellStyleStr);
|
||||||
|
numberFormat = styles.getNumberFormatAt(styleIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "is": // Inline String
|
||||||
|
isIsOpen = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Captures characters if a suitable element is open. */
|
||||||
|
@Override
|
||||||
|
public void characters(char[] ch, int start, int length) {
|
||||||
|
if (vIsOpen) {
|
||||||
|
value.append(ch, start, length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void endElement(String uri, String localName, String qName) {
|
||||||
|
if (uri != null && !NS_SPREADSHEETML.equals(uri)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isTextTag(localName)) {
|
||||||
|
vIsOpen = false;
|
||||||
|
} else {
|
||||||
|
switch (localName) {
|
||||||
|
case "sheetData" -> onSheetEnd();
|
||||||
|
case "row" -> {
|
||||||
|
nextRowNumber = rowNumber + 1;
|
||||||
|
rowNumber = -1;
|
||||||
|
}
|
||||||
|
case "c" -> outputCellValue();
|
||||||
|
case "is" -> isIsOpen = false;
|
||||||
|
case "v" -> vIsOpen = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public record CellValue(XSSDataType dataType, String strValue, String format) {
|
||||||
|
public boolean getBooleanValue() {
|
||||||
|
return strValue.charAt(0) == '1';
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getNumberValue() {
|
||||||
|
return Double.parseDouble(strValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getIntegerValue() {
|
||||||
|
return Long.parseLong(strValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Temporal getDateValue(boolean use1904Dates) {
|
||||||
|
return use1904Dates
|
||||||
|
? ExcelUtils.fromExcelDateTime1904(getIntegerValue())
|
||||||
|
: ExcelUtils.fromExcelDateTime(getIntegerValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
public Temporal getDateTimeValue(boolean use1904Dates) {
|
||||||
|
if (use1904Dates) {
|
||||||
|
var datetime = ExcelUtils.fromExcelDateTime1904(getNumberValue());
|
||||||
|
if (datetime instanceof ZonedDateTime zdt
|
||||||
|
&& zdt.getYear() == 1904
|
||||||
|
&& zdt.getDayOfYear() == 1
|
||||||
|
&& !format.contains("y")
|
||||||
|
&& !format.contains("M")
|
||||||
|
&& !format.contains("d")) {
|
||||||
|
datetime = zdt.toLocalTime();
|
||||||
|
}
|
||||||
|
return datetime;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ExcelUtils.fromExcelDateTime(getNumberValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStringValue() {
|
||||||
|
if (dataType == XSSDataType.SST_STRING) {
|
||||||
|
return getSharedString(value.toString());
|
||||||
|
} else if (dataType == XSSDataType.INLINE_STRING) {
|
||||||
|
return new XSSFRichTextString(value.toString()).toString();
|
||||||
|
}
|
||||||
|
return value.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getSharedString(String value) {
|
||||||
|
int idx = Integer.parseInt(value);
|
||||||
|
var ss = sharedStrings.getItemAt(idx);
|
||||||
|
return ss == null ? null : ss.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void outputCellValue() {
|
||||||
|
short columnNumber = 0;
|
||||||
|
int i = 0;
|
||||||
|
char c;
|
||||||
|
while (i < cellRef.length() && (c = cellRef.charAt(i)) >= 'A' && c <= 'Z') {
|
||||||
|
columnNumber = (short) (columnNumber * 26 + (c - 'A' + 1));
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!seenValue) {
|
||||||
|
onCell(rowNumber, columnNumber, cellRef, new CellValue(XSSDataType.BLANK, "", null));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var stringValue = getStringValue();
|
||||||
|
if (dataType == XSSDataType.NUMBER) {
|
||||||
|
boolean isInteger = !stringValue.contains(".");
|
||||||
|
boolean isDate = DateUtil.isADateFormat(-1, numberFormat);
|
||||||
|
if (isInteger && isDate) {
|
||||||
|
dataType = XSSDataType.OLE_DATE;
|
||||||
|
} else if (isInteger) {
|
||||||
|
dataType = XSSDataType.INTEGER;
|
||||||
|
} else if (isDate) {
|
||||||
|
dataType = XSSDataType.OLE_DATETIME;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var cellValue = new CellValue(dataType, stringValue, numberFormat);
|
||||||
|
onCell(rowNumber, columnNumber, cellRef, cellValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void onDimensions(String dimension) {}
|
||||||
|
|
||||||
|
protected void onStartRow(int rowNumber) {}
|
||||||
|
|
||||||
|
protected void onCell(int rowNumber, short columnNumber, String ref, CellValue cellValue) {}
|
||||||
|
|
||||||
|
protected void onSheetEnd() {}
|
||||||
|
}
|
@ -0,0 +1,284 @@
|
|||||||
|
package org.enso.table.excel.xssfreader;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
import javax.xml.XMLConstants;
|
||||||
|
import javax.xml.namespace.NamespaceContext;
|
||||||
|
import javax.xml.xpath.XPathConstants;
|
||||||
|
import javax.xml.xpath.XPathExpression;
|
||||||
|
import javax.xml.xpath.XPathExpressionException;
|
||||||
|
import javax.xml.xpath.XPathFactory;
|
||||||
|
import org.apache.poi.ooxml.util.DocumentHelper;
|
||||||
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||||
|
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||||
|
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||||
|
import org.apache.poi.openxml4j.opc.PackageAccess;
|
||||||
|
import org.apache.poi.ss.usermodel.RichTextString;
|
||||||
|
import org.apache.poi.xssf.eventusermodel.XSSFReader;
|
||||||
|
import org.apache.poi.xssf.model.SharedStrings;
|
||||||
|
import org.apache.poi.xssf.usermodel.XSSFRelation;
|
||||||
|
import org.enso.table.excel.ExcelSheet;
|
||||||
|
import org.enso.table.excel.ExcelWorkbook;
|
||||||
|
import org.w3c.dom.Document;
|
||||||
|
import org.w3c.dom.Node;
|
||||||
|
import org.w3c.dom.NodeList;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
public class XSSFReaderWorkbook implements ExcelWorkbook {
|
||||||
|
private static final XPathFactory xpathFactory = XPathFactory.newInstance();
|
||||||
|
private static final NamespaceContext namespaceContext = new SpreadsheetContext();
|
||||||
|
private static final Map<String, XPathExpression> xpathCache = new HashMap<>();
|
||||||
|
|
||||||
|
private static XPathExpression compileXPathWithNamespace(String xpath)
|
||||||
|
throws XPathExpressionException {
|
||||||
|
if (!xpathCache.containsKey(xpath)) {
|
||||||
|
var newXPath = xpathFactory.newXPath();
|
||||||
|
newXPath.setNamespaceContext(namespaceContext);
|
||||||
|
var compiled = newXPath.compile(xpath);
|
||||||
|
xpathCache.put(xpath, compiled);
|
||||||
|
}
|
||||||
|
return xpathCache.get(xpath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class SpreadsheetContext implements NamespaceContext {
|
||||||
|
@Override
|
||||||
|
public String getNamespaceURI(String prefix) {
|
||||||
|
if (prefix == null) {
|
||||||
|
throw new IllegalArgumentException("prefix cannot be null");
|
||||||
|
}
|
||||||
|
return prefix.equals("ss") ? XSSFRelation.NS_SPREADSHEETML : XMLConstants.NULL_NS_URI;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getPrefix(String namespaceURI) {
|
||||||
|
if (namespaceURI == null) {
|
||||||
|
throw new IllegalArgumentException("namespaceURI cannot be null");
|
||||||
|
}
|
||||||
|
return namespaceURI.equals(XSSFRelation.NS_SPREADSHEETML) ? "ss" : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<String> getPrefixes(String namespaceURI) {
|
||||||
|
if (namespaceURI == null) {
|
||||||
|
throw new IllegalArgumentException("namespaceURI cannot be null");
|
||||||
|
}
|
||||||
|
return namespaceURI.equals(XSSFRelation.NS_SPREADSHEETML)
|
||||||
|
? Collections.singleton("ss").iterator()
|
||||||
|
: Arrays.stream(new String[0]).iterator();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String WORKBOOK_CONFIG_XPATH = "/ss:workbook/ss:workbookPr";
|
||||||
|
public static final String SHEET_NAME_XPATH = "/ss:workbook/ss:sheets/ss:sheet";
|
||||||
|
public static final String NAMED_RANGE_XPATH = "/ss:workbook/ss:definedNames/ss:definedName";
|
||||||
|
|
||||||
|
private final String path;
|
||||||
|
|
||||||
|
private boolean use1904DateSystemFlag = false;
|
||||||
|
private List<SheetInfo> sheetInfos;
|
||||||
|
private Map<String, SheetInfo> sheetInfoMap;
|
||||||
|
private Map<String, NamedRange> namedRangeMap;
|
||||||
|
|
||||||
|
private boolean hasReadShared = false;
|
||||||
|
private SharedStrings sharedStrings;
|
||||||
|
private XSSFReaderFormats styles;
|
||||||
|
|
||||||
|
public XSSFReaderWorkbook(String path) throws IOException {
|
||||||
|
this.path = path;
|
||||||
|
|
||||||
|
// Read the workbook data
|
||||||
|
this.readWorkbookData();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPath() {
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
void withReader(Consumer<XSSFReader> action) throws IOException {
|
||||||
|
try (var pkg = OPCPackage.open(path, PackageAccess.READ)) {
|
||||||
|
var reader = new XSSFReader(pkg);
|
||||||
|
action.accept(reader);
|
||||||
|
} catch (OpenXML4JException e) {
|
||||||
|
throw new IOException(
|
||||||
|
"Invalid format encountered when opening the file " + path + " as XLSX.", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private record SheetInfo(int index, int sheetId, String name, String relID, boolean visible) {}
|
||||||
|
|
||||||
|
private record NamedRange(String name, String formula) {}
|
||||||
|
|
||||||
|
private void readWorkbookData() throws IOException {
|
||||||
|
withReader(
|
||||||
|
reader -> {
|
||||||
|
try {
|
||||||
|
var workbookData = reader.getWorkbookData();
|
||||||
|
var workbookDoc = DocumentHelper.readDocument(workbookData);
|
||||||
|
read1904DateSetting(workbookDoc);
|
||||||
|
readSheetInfo(workbookDoc);
|
||||||
|
readNamedRanges(workbookDoc);
|
||||||
|
} catch (SAXException
|
||||||
|
| IOException
|
||||||
|
| InvalidFormatException
|
||||||
|
| XPathExpressionException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private void readNamedRanges(Document workbookDoc) throws XPathExpressionException {
|
||||||
|
var namesXPath = compileXPathWithNamespace(NAMED_RANGE_XPATH);
|
||||||
|
var nameNodes = (NodeList) namesXPath.evaluate(workbookDoc, XPathConstants.NODESET);
|
||||||
|
namedRangeMap = new HashMap<>();
|
||||||
|
for (int i = 0; i < nameNodes.getLength(); i++) {
|
||||||
|
var node = nameNodes.item(i);
|
||||||
|
var name = node.getAttributes().getNamedItem("name").getNodeValue();
|
||||||
|
var formula = node.getTextContent();
|
||||||
|
namedRangeMap.put(name, new NamedRange(name, formula));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void readSheetInfo(Document workbookDoc) throws XPathExpressionException {
|
||||||
|
var sheetXPath = compileXPathWithNamespace(SHEET_NAME_XPATH);
|
||||||
|
var sheetNodes = (NodeList) sheetXPath.evaluate(workbookDoc, XPathConstants.NODESET);
|
||||||
|
sheetInfos = new ArrayList<>(sheetNodes.getLength());
|
||||||
|
sheetInfoMap = new HashMap<>();
|
||||||
|
for (int i = 0; i < sheetNodes.getLength(); i++) {
|
||||||
|
var node = sheetNodes.item(i);
|
||||||
|
var sheetName = node.getAttributes().getNamedItem("name").getNodeValue();
|
||||||
|
var sheetId = Integer.parseInt(node.getAttributes().getNamedItem("sheetId").getNodeValue());
|
||||||
|
var relId = node.getAttributes().getNamedItem("r:id").getNodeValue();
|
||||||
|
var visible = node.getAttributes().getNamedItem("state") == null;
|
||||||
|
var sheetInfo = new SheetInfo(i, sheetId, sheetName, relId, visible);
|
||||||
|
sheetInfos.add(sheetInfo);
|
||||||
|
sheetInfoMap.put(sheetName, sheetInfo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void read1904DateSetting(Document workbookDoc) throws XPathExpressionException {
|
||||||
|
var workbookXPath = compileXPathWithNamespace(WORKBOOK_CONFIG_XPATH);
|
||||||
|
var workbookNode = (Node) workbookXPath.evaluate(workbookDoc, XPathConstants.NODE);
|
||||||
|
if (workbookNode != null) {
|
||||||
|
var date1904 = workbookNode.getAttributes().getNamedItem("date1904");
|
||||||
|
use1904DateSystemFlag = date1904 != null && "1".equals(date1904.getNodeValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized void ensureReadShared() {
|
||||||
|
if (hasReadShared) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
withReader(
|
||||||
|
reader -> {
|
||||||
|
try {
|
||||||
|
reader.setUseReadOnlySharedStringsTable(true);
|
||||||
|
sharedStrings = reader.getSharedStringsTable();
|
||||||
|
if (sharedStrings == null) {
|
||||||
|
sharedStrings =
|
||||||
|
new SharedStrings() {
|
||||||
|
@Override
|
||||||
|
public RichTextString getItemAt(int idx) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getCount() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getUniqueCount() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the styles table and attach the format data
|
||||||
|
var stylesTable = reader.getStylesTable();
|
||||||
|
styles = new XSSFReaderFormats(stylesTable);
|
||||||
|
|
||||||
|
hasReadShared = true;
|
||||||
|
} catch (InvalidFormatException | IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Flag that workbook is in 1904 format. */
|
||||||
|
boolean use1904Format() {
|
||||||
|
return use1904DateSystemFlag;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getNumberOfSheets() {
|
||||||
|
return sheetInfoMap.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getSheetIndex(String name) {
|
||||||
|
if (!sheetInfoMap.containsKey(name)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return sheetInfoMap.get(name).index;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getSheetName(int sheet) {
|
||||||
|
if (sheet < 0 || sheet >= sheetInfos.size()) {
|
||||||
|
throw new IllegalArgumentException("Sheet index out of range: " + sheet);
|
||||||
|
}
|
||||||
|
return sheetInfos.get(sheet).name;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getNumberOfNames() {
|
||||||
|
return namedRangeMap.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String[] getRangeNames() {
|
||||||
|
return namedRangeMap.keySet().toArray(String[]::new);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getNameFormula(String name) {
|
||||||
|
var namedRange = namedRangeMap.get(name);
|
||||||
|
return namedRange == null ? null : namedRange.formula;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SharedStrings getSharedStrings() {
|
||||||
|
ensureReadShared();
|
||||||
|
return sharedStrings;
|
||||||
|
}
|
||||||
|
|
||||||
|
public XSSFReaderFormats getStyles() {
|
||||||
|
ensureReadShared();
|
||||||
|
return styles;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ExcelSheet getSheetAt(int sheetIndex) {
|
||||||
|
if (sheetIndex < 0 || sheetIndex >= sheetInfos.size()) {
|
||||||
|
throw new IllegalArgumentException("Sheet index out of range: " + sheetIndex);
|
||||||
|
}
|
||||||
|
var sheetInfo = sheetInfos.get(sheetIndex);
|
||||||
|
return new XSSFReaderSheet(sheetIndex, sheetInfo.name, sheetInfo.relID, this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
// Nothing to do
|
||||||
|
}
|
||||||
|
}
|
@ -7,9 +7,6 @@ import java.util.List;
|
|||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
|
||||||
import org.apache.poi.ss.usermodel.Name;
|
|
||||||
import org.apache.poi.ss.usermodel.Workbook;
|
|
||||||
import org.apache.poi.ss.util.CellReference;
|
import org.apache.poi.ss.util.CellReference;
|
||||||
import org.enso.table.data.column.builder.Builder;
|
import org.enso.table.data.column.builder.Builder;
|
||||||
import org.enso.table.data.column.builder.InferredBuilder;
|
import org.enso.table.data.column.builder.InferredBuilder;
|
||||||
@ -24,6 +21,7 @@ import org.enso.table.excel.ExcelHeaders;
|
|||||||
import org.enso.table.excel.ExcelRange;
|
import org.enso.table.excel.ExcelRange;
|
||||||
import org.enso.table.excel.ExcelRow;
|
import org.enso.table.excel.ExcelRow;
|
||||||
import org.enso.table.excel.ExcelSheet;
|
import org.enso.table.excel.ExcelSheet;
|
||||||
|
import org.enso.table.excel.ExcelWorkbook;
|
||||||
import org.enso.table.excel.ReadOnlyExcelConnection;
|
import org.enso.table.excel.ReadOnlyExcelConnection;
|
||||||
import org.enso.table.problems.ProblemAggregator;
|
import org.enso.table.problems.ProblemAggregator;
|
||||||
import org.graalvm.polyglot.Context;
|
import org.graalvm.polyglot.Context;
|
||||||
@ -38,18 +36,17 @@ public class ExcelReader {
|
|||||||
* @return a String[] containing the sheet names.
|
* @return a String[] containing the sheet names.
|
||||||
* @throws IOException when the action fails
|
* @throws IOException when the action fails
|
||||||
*/
|
*/
|
||||||
public static String[] readSheetNames(File file, ExcelFileFormat format)
|
public static String[] readSheetNames(File file, ExcelFileFormat format) throws IOException {
|
||||||
throws IOException, InvalidFormatException {
|
|
||||||
return withWorkbook(file, format, ExcelReader::readSheetNames);
|
return withWorkbook(file, format, ExcelReader::readSheetNames);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads a list of sheet names from a workbook into an array.
|
* Reads a list of sheet names from a workbook into an array.
|
||||||
*
|
*
|
||||||
* @param workbook a {@link Workbook} to read the sheet names from.
|
* @param workbook a {@link ExcelWorkbook} to read the sheet names from.
|
||||||
* @return a String[] containing the sheet names.
|
* @return a String[] containing the sheet names.
|
||||||
*/
|
*/
|
||||||
public static String[] readSheetNames(Workbook workbook) {
|
public static String[] readSheetNames(ExcelWorkbook workbook) {
|
||||||
int sheetCount = workbook.getNumberOfSheets();
|
int sheetCount = workbook.getNumberOfSheets();
|
||||||
var output = new String[sheetCount];
|
var output = new String[sheetCount];
|
||||||
Context context = Context.getCurrent();
|
Context context = Context.getCurrent();
|
||||||
@ -68,20 +65,8 @@ public class ExcelReader {
|
|||||||
* @return a String[] containing the range names.
|
* @return a String[] containing the range names.
|
||||||
* @throws IOException when the action fails
|
* @throws IOException when the action fails
|
||||||
*/
|
*/
|
||||||
public static String[] readRangeNames(File file, ExcelFileFormat format)
|
public static String[] readRangeNames(File file, ExcelFileFormat format) throws IOException {
|
||||||
throws IOException, InvalidFormatException {
|
return withWorkbook(file, format, ExcelWorkbook::getRangeNames);
|
||||||
return withWorkbook(file, format, ExcelReader::readRangeNames);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads a list of range names for the specified XLSX/XLS file into an array.
|
|
||||||
*
|
|
||||||
* @param workbook a {@link Workbook} to read the sheet names from.
|
|
||||||
* @return a String[] containing the range names.
|
|
||||||
*/
|
|
||||||
public static String[] readRangeNames(Workbook workbook) {
|
|
||||||
var names = workbook.getAllNames();
|
|
||||||
return names.stream().map(Name::getNameName).toArray(String[]::new);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -202,7 +187,7 @@ public class ExcelReader {
|
|||||||
/**
|
/**
|
||||||
* Reads a range by sheet name, named range or address for the workbook into a table.
|
* Reads a range by sheet name, named range or address for the workbook into a table.
|
||||||
*
|
*
|
||||||
* @param workbook a {@link Workbook} to read from.
|
* @param workbook a {@link ExcelWorkbook} to read from.
|
||||||
* @param rangeNameOrAddress sheet name, range name or address to read.
|
* @param rangeNameOrAddress sheet name, range name or address to read.
|
||||||
* @param headers specifies whether the first row should be used as headers.
|
* @param headers specifies whether the first row should be used as headers.
|
||||||
* @param skip_rows skip rows from the top of the range.
|
* @param skip_rows skip rows from the top of the range.
|
||||||
@ -211,7 +196,7 @@ public class ExcelReader {
|
|||||||
* @throws InvalidLocationException when the range name or address is not found.
|
* @throws InvalidLocationException when the range name or address is not found.
|
||||||
*/
|
*/
|
||||||
public static Table readRangeByName(
|
public static Table readRangeByName(
|
||||||
Workbook workbook,
|
ExcelWorkbook workbook,
|
||||||
String rangeNameOrAddress,
|
String rangeNameOrAddress,
|
||||||
ExcelHeaders.HeaderBehavior headers,
|
ExcelHeaders.HeaderBehavior headers,
|
||||||
int skip_rows,
|
int skip_rows,
|
||||||
@ -230,11 +215,10 @@ public class ExcelReader {
|
|||||||
problemAggregator);
|
problemAggregator);
|
||||||
}
|
}
|
||||||
|
|
||||||
Name name = workbook.getName(rangeNameOrAddress);
|
|
||||||
|
|
||||||
ExcelRange excelRange;
|
ExcelRange excelRange;
|
||||||
try {
|
try {
|
||||||
excelRange = new ExcelRange(name == null ? rangeNameOrAddress : name.getRefersToFormula());
|
var formula = workbook.getNameFormula(rangeNameOrAddress);
|
||||||
|
excelRange = new ExcelRange(formula == null ? rangeNameOrAddress : formula);
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
throw new InvalidLocationException(
|
throw new InvalidLocationException(
|
||||||
rangeNameOrAddress,
|
rangeNameOrAddress,
|
||||||
@ -271,8 +255,8 @@ public class ExcelReader {
|
|||||||
readRange(workbook, excelRange, headers, skip_rows, row_limit, problemAggregator));
|
readRange(workbook, excelRange, headers, skip_rows, row_limit, problemAggregator));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T> T withWorkbook(File file, ExcelFileFormat format, Function<Workbook, T> action)
|
private static <T> T withWorkbook(
|
||||||
throws IOException {
|
File file, ExcelFileFormat format, Function<ExcelWorkbook, T> action) throws IOException {
|
||||||
try (ReadOnlyExcelConnection connection =
|
try (ReadOnlyExcelConnection connection =
|
||||||
ExcelConnectionPool.INSTANCE.openReadOnlyConnection(file, format)) {
|
ExcelConnectionPool.INSTANCE.openReadOnlyConnection(file, format)) {
|
||||||
return connection.withWorkbook(action);
|
return connection.withWorkbook(action);
|
||||||
@ -280,7 +264,7 @@ public class ExcelReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static Table readRange(
|
public static Table readRange(
|
||||||
Workbook workbook,
|
ExcelWorkbook workbook,
|
||||||
ExcelRange excelRange,
|
ExcelRange excelRange,
|
||||||
ExcelHeaders.HeaderBehavior headers,
|
ExcelHeaders.HeaderBehavior headers,
|
||||||
int skip_rows,
|
int skip_rows,
|
||||||
@ -304,7 +288,7 @@ public class ExcelReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static Table readTable(
|
private static Table readTable(
|
||||||
Workbook workbook,
|
ExcelWorkbook workbook,
|
||||||
int sheetIndex,
|
int sheetIndex,
|
||||||
ExcelRange excelRange,
|
ExcelRange excelRange,
|
||||||
ExcelHeaders.HeaderBehavior headers,
|
ExcelHeaders.HeaderBehavior headers,
|
||||||
@ -312,7 +296,7 @@ public class ExcelReader {
|
|||||||
int rowCount,
|
int rowCount,
|
||||||
ProblemAggregator problemAggregator) {
|
ProblemAggregator problemAggregator) {
|
||||||
|
|
||||||
ExcelSheet sheet = new ExcelSheet(workbook, sheetIndex);
|
ExcelSheet sheet = workbook.getSheetAt(sheetIndex);
|
||||||
|
|
||||||
// Expand Single Cell
|
// Expand Single Cell
|
||||||
if (excelRange != null && excelRange.isSingleCell()) {
|
if (excelRange != null && excelRange.isSingleCell()) {
|
||||||
|
@ -77,7 +77,8 @@ public class ExcelWriter {
|
|||||||
headers =
|
headers =
|
||||||
headers != ExcelHeaders.HeaderBehavior.INFER
|
headers != ExcelHeaders.HeaderBehavior.INFER
|
||||||
? headers
|
? headers
|
||||||
: shouldWriteHeaders(new ExcelSheet(workbook, sheetIndex), firstRow + 1, 1, -1);
|
: shouldWriteHeaders(
|
||||||
|
ExcelSheet.forPOIUserModel(workbook, sheetIndex), firstRow + 1, 1, -1);
|
||||||
|
|
||||||
String sheetName = workbook.getSheetName(sheetIndex - 1);
|
String sheetName = workbook.getSheetName(sheetIndex - 1);
|
||||||
workbook.removeSheetAt(sheetIndex - 1);
|
workbook.removeSheetAt(sheetIndex - 1);
|
||||||
@ -130,7 +131,8 @@ public class ExcelWriter {
|
|||||||
headers =
|
headers =
|
||||||
headers != ExcelHeaders.HeaderBehavior.INFER
|
headers != ExcelHeaders.HeaderBehavior.INFER
|
||||||
? headers
|
? headers
|
||||||
: shouldWriteHeaders(new ExcelSheet(workbook, sheetIndex), firstRow + 1, 1, -1);
|
: shouldWriteHeaders(
|
||||||
|
ExcelSheet.forPOIUserModel(workbook, sheetIndex), firstRow + 1, 1, -1);
|
||||||
|
|
||||||
workbook.removeSheetAt(sheetIndex);
|
workbook.removeSheetAt(sheetIndex);
|
||||||
Sheet sheet = workbook.createSheet(sheetName);
|
Sheet sheet = workbook.createSheet(sheetName);
|
||||||
@ -198,7 +200,7 @@ public class ExcelWriter {
|
|||||||
throw new InvalidLocationException(
|
throw new InvalidLocationException(
|
||||||
range.getSheetName(), "Unknown sheet '" + range.getSheetName() + "'.");
|
range.getSheetName(), "Unknown sheet '" + range.getSheetName() + "'.");
|
||||||
}
|
}
|
||||||
ExcelSheet sheet = new ExcelSheet(workbook, sheetIndex);
|
ExcelSheet sheet = ExcelSheet.forPOIUserModel(workbook, sheetIndex);
|
||||||
|
|
||||||
if (skipRows != 0) {
|
if (skipRows != 0) {
|
||||||
if (range.isWholeColumn()) {
|
if (range.isWholeColumn()) {
|
||||||
|
@ -1112,7 +1112,7 @@ add_specs suite_builder =
|
|||||||
|
|
||||||
Problems.expect_warning Duplicate_Output_Column_Names r3
|
Problems.expect_warning Duplicate_Output_Column_Names r3
|
||||||
Problems.expect_warning Column_Count_Mismatch r3
|
Problems.expect_warning Column_Count_Mismatch r3
|
||||||
|
|
||||||
group_builder.specify "during `read_many`, should correctly handle empty sheets" <|
|
group_builder.specify "during `read_many`, should correctly handle empty sheets" <|
|
||||||
with_temp_dir base_dir->
|
with_temp_dir base_dir->
|
||||||
tsv_file = base_dir / "1.tsv"
|
tsv_file = base_dir / "1.tsv"
|
||||||
|
Loading…
Reference in New Issue
Block a user