Tables: column mapping & masking (#1297)

This commit is contained in:
Marcin Kostrzewa 2020-11-18 15:09:43 +01:00 committed by GitHub
parent cf9be4ff29
commit ab2c5ed097
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
40 changed files with 1617 additions and 314 deletions

View File

@ -1,5 +1,6 @@
from Base import all
polyglot java import java.lang.Math
polyglot java import java.lang.Double
polyglot java import java.lang.String
## Computes the inverse of the sine function
@ -95,3 +96,8 @@ Number.max that = if this > that then this else that
Number.to_json : Json.Number
Number.to_json = Json.Number this
## Parses a textual representation of a decimal into a decimal number.
Returns `Nothing` if the text does not represent a valid decimal.
Decimal.parse : Text -> Decimal | Nothing
Decimal.parse text =
Panic.recover (Double.parseDouble [text]) . catch (_ -> Nothing)

View File

@ -176,3 +176,47 @@ Text.to_json = Json.String this
Text.repeat : Integer -> Text
Text.repeat count =
0.up_to count . fold "" acc-> _-> acc + this
## Creates a new text by removing the first `count` characters of `this`.
If `count` is greater than the number of characters in `this`, an empty text
is returned.
Text.drop_first : Integer -> Text
Text.drop_first count =
iterator = BreakIterator.getCharacterInstance []
iterator.setText [this]
iterator.first []
boundary = iterator.next [count]
if boundary == -1 then '' else Text_Utils.drop_first [this, boundary]
## Creates a new text by removing the last `count` characters of `this`.
If `count` is greater than the number of characters in `this`, an empty text
is returned.
Text.drop_last : Integer -> Text
Text.drop_last count =
iterator = BreakIterator.getCharacterInstance []
iterator.setText [this]
iterator.last []
boundary = iterator.next [-count]
if boundary == -1 then '' else Text_Utils.substring [this, 0, boundary]
## Creates a new text by selecting the first `count` characters of `this`.
If `count` is greater than the number of characters in `this`, the whole
`this` is returned.
Text.take_first : Integer -> Text
Text.take_first count =
iterator = BreakIterator.getCharacterInstance []
iterator.setText [this]
iterator.first []
boundary = iterator.next [count]
if boundary == -1 then this else Text_Utils.substring [this, 0, boundary]
## Creates a new text by selecting the last `count` characters of `this`.
If `count` is greater than the number of characters in `this`, the whole
`this` is returned.
Text.take_last : Integer -> Text
Text.take_last count =
iterator = BreakIterator.getCharacterInstance []
iterator.setText [this]
iterator.last []
boundary = iterator.next [-count]
if boundary == -1 then this else Text_Utils.drop_first [this, boundary]

View File

@ -28,8 +28,8 @@ export Base.System.File
from Base.Data.Any.Extensions export all
from Base.Data.List export Nil, Cons
from Base.Data.Number.Extensions export all hiding Math, String, Double
from Base.Data.Noise export all hiding Noise
from Base.Data.Number.Extensions export all hiding Math, String
from Base.Data.Pair export Pair
from Base.Data.Range export Range
from Base.Data.Text.Extensions export Text

View File

@ -297,3 +297,6 @@ read path = (here.new path).read
current_directory : File
current_directory = File (Prim_Io.get_cwd)
## Returns the home directory of the current user.
home : File
home = here.new (Prim_Io.get_user_home)

View File

@ -0,0 +1,143 @@
from Base import all
import Table.Data.Table
polyglot java import org.enso.table.data.table.Column as Java_Column
type Column
type Column java_column
## Returns a text containing an ASCII-art table displaying this data.
Arguments:
- show_rows: the number of initial rows that should be displayed.
display : Integer -> Text
display show_rows=10 =
java_col = this.java_column
col_name = java_col.getName []
storage = java_col.getStorage []
num_rows = java_col.getSize []
display_rows = min num_rows show_rows
items = Vector.new display_rows num->
[if storage.isNa [num] then "NA" else here.get_item_string storage num]
table = Table.print_table [col_name] items
if num_rows - display_rows <= 0 then table else
missing = '\n\u2026 and ' + (num_rows - display_rows).to_text + ' hidden rows.'
table + missing
## Prints an ASCII-art table with this data to the standard output.
Arguments:
- show_rows: the number of initial rows that should be displayed.
print show_rows=10 =
IO.println (this.display show_rows)
## Element-wise equality comparison. Returns a column with results of
comparing this column's elements against `other`.
== : Any -> Column
== other =
here.run_vectorized_op this "==" (== other) other
## Element-wise non-equality comparison. Returns a column with results of
comparing this column's elements against `other`.
!= : Any -> Column
!= other = (this == other).not
## Boolean negation of each element in this column.
not : Column
not =
here.run_vectorized_op this "not" not Nothing
## Applies `function` to each item in this column and returns the column
of results.
map function =
storage = this.java_column.getStorage []
new_st = storage.map [function]
col = Java_Column.new ["Result", new_st].to_array
Column col
## Returns a new column, containing the same elements as `this`, but with
the given name.
rename name = Column (this.java_column.rename [name])
## Returns the name of this column.
name = this.java_column.getName []
## Returns the length of this column.
length = this.java_column . getSize []
## Returns the item contained in this column at the given index.
at index =
storage = this.java_column.getStorage []
if storage.isNa [index] then Nothing else
storage.getItem [index]
## Returns a vector containing all the elements in this column.
to_vector = Vector.new this.length this.at
## Returns the underlying storage type of this column.
storage_type =
tp = this.java_column.getStorage [] . getType []
if tp == Storage_Type_String then Text else
if tp == Storage_Type_Long then Integer else
if tp == Storage_Type_Double then Decimal else
if tp == Storage_Type_Bool then Boolean else
Any
## Converts this column to JSON.
to_json =
col = this.java_column
name = col.getName []
storage = col.getStorage []
json_factory = case this.storage_type of
Text -> Json.String
Integer -> Json.Number
Decimal -> Json.Number
Boolean -> Json.Boolean
_ -> to_json
storage_json = Json.Array (here.storage_to_json storage json_factory)
fields = Map.singleton "name" (Json.String name) . insert "data" storage_json
Json.Object fields
## Creates a new column given a name and a vector of elements.
from_vector name items = Column (Java_Column.fromItems [name, items.to_array])
## PRIVATE
run_vectorized_op column java_op_name fallback_method operand =
storage = column.java_column.getStorage []
rs = if storage.isOpVectorized [java_op_name] then storage.runVectorizedOp [java_op_name, operand] else
storage.map [fallback_method]
Column (Java_Column.new ["Result", rs].to_array)
## PRIVATE
Keep this in sync with `org.enso.table.data.Storage.Type.LONG`
storage_type_long = 1
## PRIVATE
Keep this in sync with `org.enso.table.data.Storage.Type.DOUBLE`
storage_type_double = 2
## PRIVATE
Keep this in sync with `org.enso.table.data.Storage.Type.STRING`
storage_type_string = 3
## PRIVATE
Keep this in sync with `org.enso.table.data.Storage.Type.BOOL`
storage_type_bool = 4
## PRIVATE
storage_to_json storage factory =
Vector.new (storage.size []) ix->
if storage.isNa [ix] then Json.Null else
factory (storage.getItem [ix])
## PRIVATE
get_item_string column ix =
tp = column.getType []
if tp == Storage_Type_String then column.getItem [ix] else
column.getItem [ix] . to_text

View File

@ -0,0 +1,98 @@
from Base import all
import Table.Io.Csv
import Table.Data.Column
polyglot java import org.enso.table.data.table.Table as Java_Table
## Represents a column-oriented table data structure.
type Table
type Table java_table
## Returns a text containing an ASCII-art table displaying this data.
Arguments:
- show_rows: the number of initial rows that should be displayed.
display : Integer -> Text
display show_rows=10 =
cols = Vector.Vector (this.java_table.getColumns [])
col_names = cols.map (_.getName [])
col_vals = cols.map (_.getStorage [])
num_rows = this.java_table.nrows []
display_rows = min num_rows show_rows
rows = Vector.new display_rows row_num->
col_vals.map col->
if col.isNa [row_num] then "NA" else Column.get_item_string col row_num
table = here.print_table col_names rows
if num_rows - display_rows <= 0 then table else
missing = '\n\u2026 and ' + (num_rows - display_rows).to_text + ' hidden rows.'
table + missing
## Prints an ASCII-art table with this data to the standard output.
Arguments:
- show_rows: the number of initial rows that should be displayed.
print show_rows=10 =
IO.println (this.display show_rows)
## Converts this table to a JSON structure.
to_json : Json
to_json =
col_jsons = this.columns.map to_json
cols_json = Json.Array col_jsons
fields = Map.singleton "columns" cols_json
Json.Object fields
## Returns the column with the given name.
at : Text -> Column | Nothing
at name = case this.java_table.getColumnByName [name] of
Nothing -> Nothing
c -> Column.Column c
## Selects only the rows of this table that correspond to `True` values in
`indexes`.
This is useful for filtering the rows by given predicate.
> Example
Select only the rows of `my_table` where the `"Status"` column has the
value `"Valid"`
my_table.where (my_table.at "Status" == "Valid")
where indexes =
Table (this.java_table.mask [indexes.java_column])
## Sets the column value at the given name. If a column with the given name
already exists, it will be replaced. Otherwise a new column is added.
set name column =
Table (this.java_table.addOrReplaceColumn [column.rename name . java_column])
## Returns the vector of columns contained in this table.
columns =
Vector.Vector (this.java_table.getColumns []) . map Column.Column
## PRIVATE
from_columns cols = Table (Java_Table.new [cols.to_array].to_array)
## Creates a new table from a vector of `[name, items]` pairs.
> Example
Create a new table with the given in two columns:
Table.new [["foo", [1, 2, 3]], ["bar", [True, False, True]]]
new columns =
cols = columns.map c->
Column.from_vector (c.at 0) (c.at 1) . java_column
here.from_columns cols
## PRIVATE
pad txt len =
true_len = txt.characters.length
txt + (" ".repeat (len - true_len))
## PRIVATE
print_table header rows =
content_lengths = Vector.new header.length i->
max_row = 0.up_to rows.length . fold 0 a-> j-> max a (rows.at j . at i . characters . length)
max max_row (header.at i . characters . length)
header_line = zip header content_lengths here.pad . join ' | '
divider = content_lengths . map (l -> "-".repeat l+2) . join '+'
row_lines = rows.map r->
x = zip r content_lengths here.pad . join ' | '
" " + x
([" " + header_line, divider] + row_lines).join '\n'

View File

@ -1,5 +1,5 @@
from Base import all
import Table.Table
import Table.Data.Table
polyglot java import org.enso.table.format.csv.Parser

View File

@ -1,5 +1,10 @@
from Base import all
import Table.Io.Csv
import Table.Data.Table
import Table.Data.Column
from Table.Io.Csv export all hiding Parser
export Table.Data.Column
from Table.Data.Table export new

View File

@ -1,95 +0,0 @@
from Base import all
import Table.Io.Csv
## Represents a column-oriented table data structure.
type Table
type Table java_table
## Returns a text containing an ASCII-art table displaying this data.
Arguments:
- show_rows: the number of initial rows that should be displayed.
display : Integer -> Text
display show_rows=10 =
cols = Vector.Vector (this.java_table.getColumns [])
col_names = cols.map (_.getName [])
col_vals = cols.map (_.getStorage [])
num_rows = this.java_table.nrows []
display_rows = min num_rows show_rows
rows = Vector.new display_rows row_num->
col_vals.map col->
if col.isNa [row_num] then "NA" else here.get_item_string col row_num
table = here.print_table col_names rows
if num_rows - display_rows <= 0 then table else
missing = '\n\u2026 and ' + (num_rows - display_rows).to_text + ' hidden rows.'
table + missing
## Converts this table to a JSON structure.
to_json : Json
to_json =
col_jsons = Vector.Vector (this.java_table.getColumns []) . map here.column_to_json
cols_json = Json.Array col_jsons
fields = Map.singleton "columns" cols_json
Json.Object fields
## PRIVATE
Keep this in sync with `org.enso.table.data.Storage.Type.LONG`
storage_type_long = 1
## PRIVATE
Keep this in sync with `org.enso.table.data.Storage.Type.DOUBLE`
storage_type_double = 2
## PRIVATE
Keep this in sync with `org.enso.table.data.Storage.Type.STRING`
storage_type_string = 3
## PRIVATE
string_storage_to_json storage =
Vector.new (storage.size []) ix->
if storage.isNa [ix] then Json.Null else
Json.String (storage.getItem [ix])
## PRIVATE
numeric_storage_to_json storage =
Vector.new (storage.size []) ix->
if storage.isNa [ix] then Json.Null else
Json.Number (storage.getItem [ix])
## PRIVATE
column_to_json col =
name = col.getName []
storage = col.getStorage []
storage_type = storage.getType []
storage_jsons = if storage_type == Storage_Type_String then here.string_storage_to_json storage else
here.numeric_storage_to_json storage
fields = Map.singleton "name" (Json.String name) . insert "data" (Json.Array storage_jsons)
Json.Object fields
## PRIVATE
get_item_string column ix =
tp = column.getType []
if tp == Storage_Type_String then column.getItem [ix] else
column.getItem [ix] . to_text
## PRIVATE
pad txt len =
true_len = txt.characters.length
txt + (" ".repeat (len - true_len))
## PRIVATE
print_table header rows =
content_lengths = Vector.new header.length i->
max_row = 0.up_to rows.length . fold 0 a-> j-> max a (rows.at j . at i . characters . length)
max max_row (header.at i . characters . length)
header_line = zip header content_lengths here.pad . join ' | '
divider = content_lengths . map (l -> "-".repeat l+2) . join '+'
row_lines = rows.map r->
x = zip r content_lengths here.pad . join ' | '
" " + x
([" " + header_line, divider] + row_lines).join '\n'

View File

@ -1,10 +1,12 @@
package org.enso.interpreter.node.expression.builtin.interop.syntax;
import com.oracle.truffle.api.dsl.Fallback;
import com.oracle.truffle.api.dsl.GenerateUncached;
import com.oracle.truffle.api.dsl.ReportPolymorphism;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.dsl.*;
import com.oracle.truffle.api.interop.InteropLibrary;
import com.oracle.truffle.api.library.CachedLibrary;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.Language;
import org.enso.interpreter.runtime.Context;
import org.enso.interpreter.runtime.callable.atom.Atom;
import org.enso.interpreter.runtime.data.text.Text;
/**
@ -56,6 +58,14 @@ public abstract class HostValueToEnsoNode extends Node {
return Text.create(txt);
}
@Specialization(guards = "nulls.isNull(o)")
Atom doNull(
Object o,
@CachedLibrary(limit = "3") InteropLibrary nulls,
@CachedContext(Language.class) Context ctx) {
return ctx.getBuiltins().nothing().newInstance();
}
@Fallback
Object doOther(Object o) {
return o;

View File

@ -0,0 +1,19 @@
package org.enso.interpreter.node.expression.builtin.io;
import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.api.nodes.Node;
import org.apache.commons.lang3.SystemUtils;
import org.enso.interpreter.dsl.BuiltinMethod;
import org.enso.interpreter.runtime.data.text.Text;
@BuiltinMethod(
type = "Prim_Io",
name = "user_home",
description = "Get the text path to the user home directory.")
public final class GetUserHomeNode extends Node {
private final Text home = Text.create(System.getProperty("user.home"));
Text execute(Object _this) {
return home;
}
}

View File

@ -126,6 +126,7 @@ public class Builtins {
scope.registerMethod(io, "readln", ReadlnMethodGen.makeFunction(language));
scope.registerMethod(primIo, "get_file", GetFileMethodGen.makeFunction(language));
scope.registerMethod(primIo, "get_cwd", GetCwdMethodGen.makeFunction(language));
scope.registerMethod(primIo, "get_user_home", GetUserHomeMethodGen.makeFunction(language));
scope.registerMethod(runtime, "no_inline", NoInlineMethodGen.makeFunction(language));
scope.registerMethod(runtime, "gc", GCMethodGen.makeFunction(language));

View File

@ -4,13 +4,16 @@ import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.api.CompilerDirectives.CompilationFinal;
import com.oracle.truffle.api.dsl.Bind;
import com.oracle.truffle.api.dsl.Cached;
import com.oracle.truffle.api.dsl.CachedContext;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.interop.*;
import com.oracle.truffle.api.library.CachedLibrary;
import com.oracle.truffle.api.library.ExportLibrary;
import com.oracle.truffle.api.library.ExportMessage;
import com.oracle.truffle.api.nodes.UnexpectedResultException;
import org.enso.interpreter.Language;
import org.enso.interpreter.node.expression.builtin.text.util.ToJavaStringNode;
import org.enso.interpreter.runtime.Context;
import org.enso.interpreter.runtime.callable.UnresolvedSymbol;
import org.enso.interpreter.runtime.callable.function.Function;
import org.enso.interpreter.runtime.data.Array;
@ -168,4 +171,9 @@ public class Atom implements TruffleObject {
return Text.create(this.toString());
}
}
@ExportMessage
boolean isNull(@CachedContext(Language.class) Context ctx) {
return this.getConstructor() == ctx.getBuiltins().nothing();
}
}

View File

@ -20,6 +20,17 @@ public class Text_Utils {
return string.substring(from, to);
}
/**
* Returns a new string containing characters starting at the given UTF-16 index.
*
* @param string the string to trim
* @param from number of characters to drop
* @return a trimmed string
*/
public static String drop_first(String string, int from) {
return string.substring(from);
}
/**
* Converts a string into an array of UTF-8 bytes.
*
@ -62,8 +73,8 @@ public class Text_Utils {
public static boolean equals(String str1, Object str2) {
if (str2 instanceof String) {
return Normalizer2.getNFDInstance()
.normalize(str1)
.equals(Normalizer2.getNFDInstance().normalize((String) str2));
.normalize(str1)
.equals(Normalizer2.getNFDInstance().normalize((String) str2));
} else {
return false;
}

View File

@ -1,48 +0,0 @@
package org.enso.table.data.column;
import java.util.BitSet;
/** A column containing floating point numbers. */
public class DoubleStorage extends Storage {
private final long[] data;
private final BitSet isMissing;
private final int size;
/**
* @param data the underlying data
* @param size the number of items stored
* @param isMissing a bit set denoting at index {@code i} whether or not the value at index {@code
* i} is missing.
*/
public DoubleStorage(long[] data, int size, BitSet isMissing) {
this.data = data;
this.isMissing = isMissing;
this.size = size;
}
/** @inheritDoc */
@Override
public long size() {
return size;
}
/**
* @param idx an index
* @return the data item contained at the given index.
*/
public double getItem(long idx) {
return Double.longBitsToDouble(data[(int) idx]);
}
/** @inheritDoc */
@Override
public long getType() {
return Type.DOUBLE;
}
/** @inheritDoc */
@Override
public boolean isNa(long idx) {
return isMissing.get((int) idx);
}
}

View File

@ -1,48 +0,0 @@
package org.enso.table.data.column;
import java.util.BitSet;
/** A column storing 64-bit integers. */
public class LongStorage extends Storage {
private final long[] data;
private final BitSet isMissing;
private final int size;
/**
* @param data the underlying data
* @param size the number of items stored
* @param isMissing a bit set denoting at index {@code i} whether or not the value at index {@code
* i} is missing.
*/
public LongStorage(long[] data, int size, BitSet isMissing) {
this.data = data;
this.isMissing = isMissing;
this.size = size;
}
/** @inheritDoc */
@Override
public long size() {
return size;
}
/**
* @param idx an index
* @return the data item contained at the given index.
*/
public long getItem(long idx) {
return data[(int) idx];
}
/** @inheritDoc */
@Override
public long getType() {
return Type.LONG;
}
/** @inheritDoc */
@Override
public boolean isNa(long idx) {
return isMissing.get((int) idx);
}
}

View File

@ -1,31 +0,0 @@
package org.enso.table.data.column;
/** An abstract representation of a data column. */
public abstract class Storage {
/** @return the number of elements in this column (including NAs) */
public abstract long size();
/** @return the type tag of this column's storage. Must be one of {@link Type} */
public abstract long getType();
/**
* Checks whether the value at {@code idx} is missing.
*
* @param idx the index to check.
* @return whether or not the value is missing.
*/
public abstract boolean isNa(long idx);
/**
* Enumerating possible storage types.
*
* <p>Keep in sync with variables in {@code Table.Table}. These variables are copied between Enso
* and Java code, in order to make them trivially constant on the Enso side, without invoking the
* polyglot machinery to access them.
*/
public static final class Type {
public static final long LONG = 1;
public static final long DOUBLE = 2;
public static final long STRING = 3;
}
}

View File

@ -1,42 +0,0 @@
package org.enso.table.data.column;
/** A column storing strings. */
public class StringStorage extends Storage {
private final String[] data;
private final int size;
/**
* @param data the underlying data
* @param size the number of items stored
*/
public StringStorage(String[] data, int size) {
this.data = data;
this.size = size;
}
/** @inheritDoc */
@Override
public long size() {
return size;
}
/**
* @param idx an index
* @return the data item contained at the given index.
*/
public String getItem(long idx) {
return data[(int) idx];
}
/** @inheritDoc */
@Override
public long getType() {
return Type.STRING;
}
/** @inheritDoc */
@Override
public boolean isNa(long idx) {
return data[(int) idx] == null;
}
}

View File

@ -0,0 +1,63 @@
package org.enso.table.data.column.builder.object;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.Storage;
import java.util.BitSet;
/**
* A builder for boolean columns.
*/
public class BoolBuilder extends TypedBuilder {
private final BitSet vals = new BitSet();
private final BitSet isNa = new BitSet();
int size = 0;
@Override
public void append(Object o) {
if (o == null) {
isNa.set(size);
} else {
if ((Boolean) o) {
vals.set(size);
}
}
size++;
}
@Override
public Storage seal() {
return new BoolStorage(vals, isNa, size, false);
}
@Override
public int getCurrentSize() {
return size;
}
@Override
public void writeTo(Object[] items) {
for (int i = 0; i < size; i++) {
if (isNa.get(i)) {
items[i] = null;
} else {
items[i] = vals.get(i);
}
}
}
@Override
public boolean canRetypeTo(long type) {
return false;
}
@Override
public TypedBuilder retypeTo(long type) {
throw new UnsupportedOperationException();
}
@Override
public int getType() {
return Storage.Type.BOOL;
}
}

View File

@ -0,0 +1,19 @@
package org.enso.table.data.column.builder.object;
import org.enso.table.data.column.storage.Storage;
/** A builder for creating columns dynamically. */
public abstract class Builder {
/**
* Append a new item to this builder.
*
* @param o the item to append
*/
public abstract void append(Object o);
/** @return the number of appended elements */
public abstract int getCurrentSize();
/** @return a storage containing all the items appended so far */
public abstract Storage seal();
}

View File

@ -0,0 +1,124 @@
package org.enso.table.data.column.builder.object;
import org.enso.table.data.column.storage.Storage;
/**
* A builder performing type inference on the appended elements, choosing the best possible storage.
*/
public class InferredBuilder extends Builder {
private TypedBuilder currentBuilder = null;
private int currentSize = 0;
private final int size;
/**
* Creates a new instance of this builder, with the given known result size.
*
* @param size the result size
*/
public InferredBuilder(int size) {
this.size = size;
}
@Override
public void append(Object o) {
if (currentBuilder == null) {
if (o == null) {
currentSize++;
return;
} else {
initBuilderFor(o);
}
}
if (o == null) {
currentBuilder.append(o);
} else {
switch (currentBuilder.getType()) {
case Storage.Type.BOOL:
if (o instanceof Boolean) {
currentBuilder.append(o);
} else {
retypeAndAppend(o);
}
break;
case Storage.Type.LONG:
if (o instanceof Long) {
currentBuilder.append(o);
} else {
retypeAndAppend(o);
}
break;
case Storage.Type.DOUBLE:
if (o instanceof Double) {
currentBuilder.append(o);
} else if (o instanceof Long) {
currentBuilder.append(((Long) o).doubleValue());
} else {
retypeAndAppend(o);
}
break;
case Storage.Type.STRING:
if (o instanceof String) {
currentBuilder.append(o);
} else {
retypeAndAppend(o);
}
break;
case Storage.Type.OBJECT:
currentBuilder.append(o);
break;
}
}
currentSize++;
}
private void initBuilderFor(Object o) {
if (o instanceof Boolean) {
currentBuilder = new BoolBuilder();
} else if (o instanceof Double) {
currentBuilder = NumericBuilder.createDoubleBuilder(size);
} else if (o instanceof Long) {
currentBuilder = NumericBuilder.createLongBuilder(size);
} else if (o instanceof String) {
currentBuilder = new StringBuilder(size);
} else {
currentBuilder = new ObjectBuilder(size);
}
for (int i = 0; i < currentSize; i++) {
currentBuilder.append(null);
}
}
private void retypeAndAppend(Object o) {
if (o instanceof Double && currentBuilder.canRetypeTo(Storage.Type.DOUBLE)) {
currentBuilder = currentBuilder.retypeTo(Storage.Type.DOUBLE);
} else if (o instanceof String && currentBuilder.canRetypeTo(Storage.Type.STRING)) {
currentBuilder = currentBuilder.retypeTo(Storage.Type.STRING);
} else if (o instanceof Long && currentBuilder.canRetypeTo(Storage.Type.LONG)) {
currentBuilder = currentBuilder.retypeTo(Storage.Type.LONG);
} else if (o instanceof Boolean && currentBuilder.canRetypeTo(Storage.Type.BOOL)) {
currentBuilder = currentBuilder.retypeTo(Storage.Type.BOOL);
} else if (currentBuilder.canRetypeTo(Storage.Type.OBJECT)) {
currentBuilder = currentBuilder.retypeTo(Storage.Type.OBJECT);
} else {
retypeToObject();
}
currentBuilder.append(o);
}
private void retypeToObject() {
ObjectBuilder objectBuilder = new ObjectBuilder(size);
currentBuilder.writeTo(objectBuilder.getData());
objectBuilder.setCurrentSize(currentBuilder.getCurrentSize());
currentBuilder = objectBuilder;
}
@Override
public int getCurrentSize() {
return currentSize;
}
@Override
public Storage seal() {
return currentBuilder.seal();
}
}

View File

@ -0,0 +1,95 @@
package org.enso.table.data.column.builder.object;
import org.enso.table.data.column.storage.DoubleStorage;
import org.enso.table.data.column.storage.LongStorage;
import org.enso.table.data.column.storage.Storage;
import java.util.BitSet;
/**
* A builder for numeric columns.
*/
public class NumericBuilder extends TypedBuilder {
private boolean isDouble;
private int currentSize;
private final int size;
private final BitSet isMissing = new BitSet();
private final long[] data;
private NumericBuilder(boolean isDouble, int size) {
this.size = size;
this.data = new long[size];
this.isDouble = isDouble;
}
public static NumericBuilder createDoubleBuilder(int size) {
return new NumericBuilder(true, size);
}
public static NumericBuilder createLongBuilder(int size) {
return new NumericBuilder(false, size);
}
@Override
public void writeTo(Object[] items) {
for (int i = 0; i < currentSize; i++) {
if (isMissing.get(i)) {
items[i] = null;
} else if (isDouble) {
items[i] = Double.longBitsToDouble(data[i]);
} else {
items[i] = data[i];
}
}
}
@Override
public boolean canRetypeTo(long type) {
return !this.isDouble && type == Storage.Type.DOUBLE;
}
@Override
public TypedBuilder retypeTo(long type) {
if (!this.isDouble && type == Storage.Type.DOUBLE) {
this.isDouble = true;
for (int i = 0; i < currentSize; i++) {
data[i] = Double.doubleToRawLongBits(data[i]);
}
return this;
} else {
throw new UnsupportedOperationException();
}
}
@Override
public int getType() {
return isDouble ? Storage.Type.DOUBLE : Storage.Type.LONG;
}
@Override
public void append(Object o) {
if (o == null) {
isMissing.set(currentSize++);
} else if (isDouble && o instanceof Double) {
data[currentSize++] = Double.doubleToRawLongBits((Double) o);
} else if (!isDouble && o instanceof Long) {
data[currentSize++] = (Long) o;
} else {
throw new UnsupportedOperationException();
}
}
@Override
public int getCurrentSize() {
return currentSize;
}
@Override
public Storage seal() {
if (isDouble) {
return new DoubleStorage(data, size, isMissing);
} else {
return new LongStorage(data, size, isMissing);
}
}
}

View File

@ -0,0 +1,64 @@
package org.enso.table.data.column.builder.object;
import org.enso.table.data.column.storage.ObjectStorage;
import org.enso.table.data.column.storage.Storage;
/** A builder for boxed object columns. */
public class ObjectBuilder extends TypedBuilder {
private final Object[] data;
private final int size;
private int currentSize = 0;
public ObjectBuilder(int size) {
this.size = size;
this.data = new Object[size];
}
public ObjectBuilder(Object[] data, int size) {
this.data = data;
this.size = size;
}
@Override
public void writeTo(Object[] items) {
throw new IllegalStateException("Broken invariant: rewriting the most general type.");
}
@Override
public boolean canRetypeTo(long type) {
return false;
}
@Override
public TypedBuilder retypeTo(long type) {
throw new IllegalStateException("Broken invariant: rewriting the most general type.");
}
@Override
public int getType() {
return Storage.Type.OBJECT;
}
@Override
public void append(Object o) {
data[currentSize++] = o;
}
@Override
public int getCurrentSize() {
return currentSize;
}
@Override
public Storage seal() {
return new ObjectStorage(data, size);
}
public Object[] getData() {
return data;
}
public void setCurrentSize(int currentSize) {
this.currentSize = currentSize;
}
}

View File

@ -0,0 +1,59 @@
package org.enso.table.data.column.builder.object;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.StringStorage;
/** A builder for string columns. */
public class StringBuilder extends TypedBuilder {
private final Object[] data;
private final int size;
private int currentSize = 0;
public StringBuilder(int size) {
this.data = new Object[size];
this.size = size;
}
@Override
public void writeTo(Object[] items) {
for (int i = 0; i < currentSize; i++) {
items[i] = data[i];
}
}
@Override
public boolean canRetypeTo(long type) {
return type == Storage.Type.OBJECT;
}
@Override
public TypedBuilder retypeTo(long type) {
if (type == Storage.Type.OBJECT) {
ObjectBuilder res = new ObjectBuilder(data, size);
res.setCurrentSize(currentSize);
return res;
} else {
throw new UnsupportedOperationException();
}
}
@Override
public int getType() {
return Storage.Type.STRING;
}
@Override
public void append(Object o) {
data[currentSize++] = o;
}
@Override
public int getCurrentSize() {
return currentSize;
}
@Override
public Storage seal() {
return new StringStorage(data, size);
}
}

View File

@ -0,0 +1,31 @@
package org.enso.table.data.column.builder.object;
/** A builder for the given storage type and known result size. */
public abstract class TypedBuilder extends Builder {
/**
* Dump all the items into a given boxed buffer.
*
* @param items the buffer to dump elements into
*/
public abstract void writeTo(Object[] items);
/**
* Checks if the builder can be efficiently retyped to the given storage type.
*
* @param type the storage type enumeration
* @return whether the column can be retyped
*/
public abstract boolean canRetypeTo(long type);
/**
* Retype this builder to the given type. Can only be called if {@link #canRetypeTo(long)} returns
* true for the type.
*
* @param type the target type
* @return a retyped builder
*/
public abstract TypedBuilder retypeTo(long type);
/** @return the current storage type of this builder */
public abstract int getType();
}

View File

@ -1,8 +1,8 @@
package org.enso.table.data.column.builder;
package org.enso.table.data.column.builder.string;
import org.enso.table.data.column.DoubleStorage;
import org.enso.table.data.column.LongStorage;
import org.enso.table.data.column.Storage;
import org.enso.table.data.column.storage.DoubleStorage;
import org.enso.table.data.column.storage.LongStorage;
import org.enso.table.data.column.storage.Storage;
import java.util.BitSet;

View File

@ -1,6 +1,6 @@
package org.enso.table.data.column.builder;
package org.enso.table.data.column.builder.string;
import org.enso.table.data.column.Storage;
import org.enso.table.data.column.storage.Storage;
/** A builder used by the parser to add items into a column. */
public abstract class StorageBuilder {

View File

@ -1,12 +1,12 @@
package org.enso.table.data.column.builder;
package org.enso.table.data.column.builder.string;
import org.enso.table.data.column.Storage;
import org.enso.table.data.column.StringStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.StringStorage;
/** A column builder appending all the values passed to it in an unchanged form. */
public class StringStorageBuilder extends StorageBuilder {
private String[] data;
private Object[] data;
private int size;
/**
@ -37,7 +37,7 @@ public class StringStorageBuilder extends StorageBuilder {
private void ensureAppendable() {
if (size >= data.length) {
String[] newData = new String[2 * data.length];
Object[] newData = new Object[2 * data.length];
System.arraycopy(data, 0, newData, 0, data.length);
data = newData;
}

View File

@ -0,0 +1,104 @@
package org.enso.table.data.column.storage;
import java.util.BitSet;
/**
* A boolean column storage.
*/
public class BoolStorage extends Storage {
private final BitSet values;
private final BitSet isMissing;
private final int size;
private final boolean negated;
public BoolStorage(BitSet values, BitSet isMissing, int size, boolean negated) {
this.values = values;
this.isMissing = isMissing;
this.size = size;
this.negated = negated;
}
@Override
public long size() {
return size;
}
@Override
public long getType() {
return Type.BOOL;
}
@Override
public Object getItemBoxed(int idx) {
return isMissing.get(idx) ? null : values.get(idx);
}
public boolean getItem(long idx) {
return negated != values.get((int) idx);
}
@Override
public boolean isNa(long idx) {
return isMissing.get((int) idx);
}
@Override
public boolean isOpVectorized(String op) {
return op.equals(Ops.EQ) || op.equals(Ops.NOT);
}
@Override
public Storage runVectorizedOp(String name, Object operand) {
if (Ops.EQ.equals(name)) {
return runVectorizedEq(operand);
} else if (Ops.NOT.equals(name)) {
return new BoolStorage(values, isMissing, size, !negated);
}
throw new UnsupportedOperationException();
}
private BoolStorage runVectorizedEq(Object operand) {
if (operand instanceof Boolean) {
if ((Boolean) operand) {
return this;
} else {
BitSet newVals = new BitSet();
newVals.or(values);
newVals.flip(0, size);
newVals.andNot(isMissing);
return new BoolStorage(newVals, new BitSet(), size, false);
}
} else {
return new BoolStorage(new BitSet(), new BitSet(), size, false);
}
}
public BitSet getValues() {
return values;
}
public BitSet getIsMissing() {
return isMissing;
}
@Override
public Storage mask(BitSet mask, int cardinality) {
BitSet newMissing = new BitSet();
BitSet newValues = new BitSet();
int resultIx = 0;
for (int i = 0; i < size; i++) {
if (mask.get(i)) {
if (isMissing.get(i)) {
newMissing.set(resultIx++);
} else if (values.get(i)) {
newValues.set(resultIx++);
}
}
}
return new BoolStorage(newValues, newMissing, cardinality, negated);
}
public boolean isNegated() {
return negated;
}
}

View File

@ -0,0 +1,101 @@
package org.enso.table.data.column.storage;
import java.util.BitSet;
import java.util.function.Function;
/** A column containing floating point numbers. */
public class DoubleStorage extends Storage {
private final long[] data;
private final BitSet isMissing;
private final int size;
private static final long NAN = 0x7ff0000000000000L;
/**
* @param data the underlying data
* @param size the number of items stored
* @param isMissing a bit set denoting at index {@code i} whether or not the value at index {@code
* i} is missing.
*/
public DoubleStorage(long[] data, int size, BitSet isMissing) {
this.data = data;
this.isMissing = isMissing;
this.size = size;
}
/** @inheritDoc */
@Override
public long size() {
return size;
}
/**
* @param idx an index
* @return the data item contained at the given index.
*/
public double getItem(long idx) {
return Double.longBitsToDouble(data[(int) idx]);
}
@Override
public Object getItemBoxed(int idx) {
return isMissing.get(idx) ? null : Double.longBitsToDouble(data[idx]);
}
/** @inheritDoc */
@Override
public long getType() {
return Type.DOUBLE;
}
/** @inheritDoc */
@Override
public boolean isNa(long idx) {
return isMissing.get((int) idx);
}
@Override
public boolean isOpVectorized(String op) {
return op.equals("==");
}
@Override
public Storage runVectorizedOp(String name, Object operand) {
if (name.equals("==")) {
return runVectorizedEq(operand);
}
throw new UnsupportedOperationException();
}
private BoolStorage runVectorizedEq(Object operand) {
BitSet isNa = new BitSet();
BitSet values = new BitSet();
if (operand instanceof Double) {
long seek = Double.doubleToRawLongBits((Double) operand);
if ((seek & NAN) != NAN) {
for (int i = 0; i < size; i++) {
if (data[i] == seek && (data[i] & NAN) != NAN && !isMissing.get(i)) {
values.set(i);
}
}
}
}
return new BoolStorage(values, isNa, size, false);
}
@Override
public DoubleStorage mask(BitSet mask, int cardinality) {
BitSet newMissing = new BitSet();
long[] newData = new long[cardinality];
int resIx = 0;
for (int i = 0; i < size; i++) {
if (mask.get(i)) {
if (isMissing.get(i)) {
newMissing.set(resIx++);
} else {
newData[resIx++] = data[i];
}
}
}
return new DoubleStorage(newData, cardinality, newMissing);
}
}

View File

@ -0,0 +1,97 @@
package org.enso.table.data.column.storage;
import java.util.BitSet;
/** A column storing 64-bit integers. */
public class LongStorage extends Storage {
private final long[] data;
private final BitSet isMissing;
private final int size;
/**
* @param data the underlying data
* @param size the number of items stored
* @param isMissing a bit set denoting at index {@code i} whether or not the value at index {@code
* i} is missing.
*/
public LongStorage(long[] data, int size, BitSet isMissing) {
this.data = data;
this.isMissing = isMissing;
this.size = size;
}
/** @inheritDoc */
@Override
public long size() {
return size;
}
/**
* @param idx an index
* @return the data item contained at the given index.
*/
public long getItem(long idx) {
return data[(int) idx];
}
@Override
public Object getItemBoxed(int idx) {
return isMissing.get(idx) ? null : data[idx];
}
/** @inheritDoc */
@Override
public long getType() {
return Type.LONG;
}
/** @inheritDoc */
@Override
public boolean isNa(long idx) {
return isMissing.get((int) idx);
}
@Override
public boolean isOpVectorized(String op) {
return Ops.EQ.equals(op);
}
@Override
public Storage runVectorizedOp(String name, Object operand) {
if (Ops.EQ.equals(name)) {
return runVectorizedEq(operand);
}
throw new UnsupportedOperationException();
}
BoolStorage runVectorizedEq(Object operand) {
BitSet isNa = new BitSet();
BitSet values = new BitSet();
if (operand instanceof Long) {
long seek = (Long) operand;
for (int i = 0; i < size; i++) {
if (data[i] == seek && !isMissing.get(i)) {
values.set(i);
}
}
}
return new BoolStorage(values, isNa, size, false);
}
@Override
public LongStorage mask(BitSet mask, int cardinality) {
BitSet newMissing = new BitSet();
long[] newData = new long[cardinality];
int resIx = 0;
for (int i = 0; i < size; i++) {
if (mask.get(i)) {
if (isMissing.get(i)) {
newMissing.set(resIx++);
} else {
newData[resIx++] = data[i];
}
}
}
return new LongStorage(newData, cardinality, newMissing);
}
}

View File

@ -0,0 +1,80 @@
package org.enso.table.data.column.storage;
import org.enso.table.data.column.builder.object.BoolBuilder;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.builder.object.InferredBuilder;
import java.util.BitSet;
import java.util.function.Function;
/** A column storing arbitrary objects. */
public class ObjectStorage extends Storage {
private final Object[] data;
private final int size;
/**
* @param data the underlying data
* @param size the number of items stored
*/
public ObjectStorage(Object[] data, int size) {
this.data = data;
this.size = size;
}
/** @inheritDoc */
@Override
public long size() {
return size;
}
/**
* @param idx an index
* @return the data item contained at the given index.
*/
public Object getItem(long idx) {
return data[(int) idx];
}
@Override
public Object getItemBoxed(int idx) {
return data[idx];
}
/** @inheritDoc */
@Override
public long getType() {
return Type.OBJECT;
}
/** @inheritDoc */
@Override
public boolean isNa(long idx) {
return data[(int) idx] == null;
}
@Override
public boolean isOpVectorized(String op) {
return false;
}
@Override
public Storage runVectorizedOp(String name, Object operand) {
throw new UnsupportedOperationException();
}
@Override
public ObjectStorage mask(BitSet mask, int cardinality) {
Object[] newData = new Object[cardinality];
int resIx = 0;
for (int i = 0; i < size; i++) {
if (mask.get(i)) {
newData[resIx++] = data[i];
}
}
return new ObjectStorage(newData, cardinality);
}
protected Object[] getData() {
return data;
}
}

View File

@ -0,0 +1,99 @@
package org.enso.table.data.column.storage;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.builder.object.InferredBuilder;
import java.util.BitSet;
import java.util.function.Function;
/** An abstract representation of a data column. */
public abstract class Storage {
/** @return the number of elements in this column (including NAs) */
public abstract long size();
/** @return the type tag of this column's storage. Must be one of {@link Type} */
public abstract long getType();
/**
* Checks whether the value at {@code idx} is missing.
*
* @param idx the index to check.
* @return whether or not the value is missing.
*/
public abstract boolean isNa(long idx);
/**
* Returns a boxed representation of an item. Missing values are denoted with null.
*
* @param idx the index to look up
* @return the item at position {@code idx}
*/
public abstract Object getItemBoxed(int idx);
/**
* Enumerating possible storage types.
*
* <p>Keep in sync with variables in {@code Table.Table}. These variables are copied between Enso
* and Java code, in order to make them trivially constant on the Enso side, without invoking the
* polyglot machinery to access them.
*/
public static final class Type {
public static final int LONG = 1;
public static final int DOUBLE = 2;
public static final int STRING = 3;
public static final int BOOL = 4;
public static final int OBJECT = 5;
}
/** A container for names of vectorizable operation. */
public static final class Ops {
public static final String EQ = "==";
public static final String NOT = "not";
}
/**
* Checks whether a vectorized version of operation exists for this storage.
*
* @param name the operation name
* @return whether a vectorized version is available
*/
public abstract boolean isOpVectorized(String name);
/**
* Runs a vectorized operation on this storage. Can only be used if {@link
* #isOpVectorized(String)} returns true.
*
* @param name the operation to run
* @param operand an argument to the operation
* @return the result of running operation over this storage
*/
public abstract Storage runVectorizedOp(String name, Object operand);
/**
* Return a new storage, containing only the items marked true in the mask.
*
* @param mask the mask to use
* @param cardinality the number of true values in mask
* @return a new storage, masked with the given mask
*/
public abstract Storage mask(BitSet mask, int cardinality);
/**
* Runs a function on each non-missing element in this storage and gathers the results.
*
* @param function the function to run.
* @return the result of running the function on all non-missing elements.
*/
public final Storage map(Function<Object, Object> function) {
Builder builder = new InferredBuilder((int) size());
for (int i = 0; i < size(); i++) {
Object it = getItemBoxed(i);
if (it == null) {
builder.append(null);
} else {
builder.append(function.apply(it));
}
}
return builder.seal();
}
}

View File

@ -0,0 +1,61 @@
package org.enso.table.data.column.storage;
import java.util.BitSet;
/** A column storing strings. */
public class StringStorage extends ObjectStorage {
/**
* @param data the underlying data
* @param size the number of items stored
*/
public StringStorage(Object[] data, int size) {
super(data, size);
}
/**
* @param idx an index
* @return the data item contained at the given index.
*/
public String getItem(long idx) {
return (String) super.getItem(idx);
}
/** @inheritDoc */
@Override
public long getType() {
return Type.STRING;
}
@Override
public boolean isOpVectorized(String op) {
return op.equals("==");
}
@Override
public Storage runVectorizedOp(String name, Object operand) {
if (Ops.EQ.equals(name)) {
return runVectorizedEq(operand);
}
throw new UnsupportedOperationException();
}
public BoolStorage runVectorizedEq(Object that) {
Object[] data = getData();
int size = (int) size();
BitSet values = new BitSet();
BitSet missing = new BitSet();
for (int i = 0; i < size; i++) {
if (!(data[i] == null) && data[i].equals(that)) {
values.set(i);
}
}
return new BoolStorage(values, missing, size, false);
}
@Override
public StringStorage mask(BitSet mask, int cardinality) {
ObjectStorage storage = super.mask(mask, cardinality);
return new StringStorage(storage.getData(), cardinality);
}
}

View File

@ -1,6 +1,10 @@
package org.enso.table.data.table;
import org.enso.table.data.column.Storage;
import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.storage.Storage;
import java.util.BitSet;
import java.util.List;
/** A representation of a column. Consists of a column name and the underlying storage. */
public class Column {
@ -27,4 +31,45 @@ public class Column {
public Storage getStorage() {
return storage;
}
/** @return the number of items in this column. */
public long getSize() {
return getStorage().size();
}
/**
* Return a new column, containing only the items marked true in the mask.
*
* @param mask the mask to use
* @param cardinality the number of true values in mask
* @return a new column, masked with the given mask
*/
public Column mask(BitSet mask, int cardinality) {
return new Column(name, storage.mask(mask, cardinality));
}
/**
* Renames the column.
*
* @param name the new name
* @return a new column with the given name
*/
public Column rename(String name) {
return new Column(name, storage);
}
/**
* Creates a new column with given name and elements.
*
* @param name the name to use
* @param items the items contained in the column
* @return a column with given name and items
*/
public static Column fromItems(String name, List<Object> items) {
InferredBuilder builder = new InferredBuilder(items.size());
for (Object item : items) {
builder.append(item);
}
return new Column(name, builder.seal());
}
}

View File

@ -1,5 +1,10 @@
package org.enso.table.data.table;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.error.UnexpectedColumnTypeException;
import java.util.BitSet;
/** A representation of a table structure. */
public class Table {
@ -27,4 +32,80 @@ public class Table {
public Column[] getColumns() {
return columns;
}
/**
* Returns a column with the given name, or null if it doesn't exist.
*
* @param name the column name
* @return a column with the given name
*/
public Column getColumnByName(String name) {
for (Column column : columns) {
if (column.getName().equals(name)) {
return column;
}
}
return null;
}
/**
* Returns a table resulting from selecting only the rows corresponding to true entries in the
* provided column.
*
* @param maskCol the masking column
* @return the result of masking this table with the provided column
*/
public Table mask(Column maskCol) {
if (!(maskCol.getStorage() instanceof BoolStorage)) {
throw new UnexpectedColumnTypeException("Boolean");
}
BoolStorage storage = (BoolStorage) maskCol.getStorage();
BitSet mask = new BitSet();
mask.or(storage.getValues());
if (storage.isNegated()) {
mask.flip(0, (int) storage.size());
}
mask.andNot(storage.getIsMissing());
int cardinality = mask.cardinality();
Column[] newColumns = new Column[columns.length];
for (int i = 0; i < columns.length; i++) {
newColumns[i] = columns[i].mask(mask, cardinality);
}
return new Table(newColumns);
}
/**
* Adds a column, or replaces it, by name.
*
* @param newColumn the column to include.
* @return a new table containing the specified column.
*/
public Table addOrReplaceColumn(Column newColumn) {
int existingIx = -1;
for (int i = 0; i < columns.length; i++) {
if (columns[i].getName().equals(newColumn.getName())) {
existingIx = i;
break;
}
}
if (existingIx == -1) {
return addColumn(newColumn);
} else {
return replaceColumn(existingIx, newColumn);
}
}
private Table replaceColumn(int ix, Column newCol) {
Column[] newCols = new Column[columns.length];
System.arraycopy(columns, 0, newCols, 0, columns.length);
newCols[ix] = newCol;
return new Table(newCols);
}
private Table addColumn(Column newColumn) {
Column[] newCols = new Column[columns.length + 1];
System.arraycopy(columns, 0, newCols, 0, columns.length);
newCols[columns.length] = newColumn;
return new Table(newCols);
}
}

View File

@ -0,0 +1,21 @@
package org.enso.table.error;
/** An error thrown when a type error is encountered. */
public class UnexpectedColumnTypeException extends RuntimeException {
private final String expected;
/**
* Creates a new instance of this error.
*
* @param expected the expected type description
*/
public UnexpectedColumnTypeException(String expected) {
super("Unexpected column type. Expected a " + expected + " column.");
this.expected = expected;
}
/** @return the expected type descriptor */
public String getExpected() {
return expected;
}
}

View File

@ -2,9 +2,9 @@ package org.enso.table.format.csv;
import com.univocity.parsers.csv.CsvParser;
import com.univocity.parsers.csv.CsvParserSettings;
import org.enso.table.data.column.Storage;
import org.enso.table.data.column.builder.StorageBuilder;
import org.enso.table.data.column.builder.PrimInferredStorageBuilder;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.builder.string.StorageBuilder;
import org.enso.table.data.column.builder.string.PrimInferredStorageBuilder;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;

View File

@ -2,35 +2,104 @@ from Base import all
from Table import all
import Test
spec = describe "Tables" <|
it "should parse a simple numeric table and convert it to JSON" <|
simple_empty = (Enso_Project.data / "simple_empty.csv") . read_csv
c_1_data = [1, 4, 7, 10]
c_2_data = [2, Nothing, 8, 11]
c_3_data = [Nothing, 6, 9, 12]
type My x y
c_1 = Json.from_pairs [["name", "a"], ["data", c_1_data]]
c_2 = Json.from_pairs [["name", "b"], ["data", c_2_data]]
c_3 = Json.from_pairs [["name", "c"], ["data", c_3_data]]
My.== that = case that of
My x1 y1 -> (this.x + this.y) == (x1 + y1)
_ -> False
expected = Json.from_pairs [["columns", [c_1, c_2, c_3]]]
My.frobnicate = case this of
My x1 y1 -> My y1 x1
simple_empty.to_json.should equal expected
it "should correctly infer types of varied-type columns" <|
varied_column = (Enso_Project.data / "varied_column.csv") . read_csv has_header=False
c_1_data = ["2005-02-25", "2005-02-28", "4", "2005-03-02", Nothing, "2005-03-04", "2005-03-07", "2005-03-08"]
c_2_data = ["2005-02-25", "2005-02-28", "2005-03-01", Nothing, "2005-03-03", "2005-03-04", "2005-03-07", "2005-03-08"]
c_3_data = [1, 2, 3, 4, 5, Nothing, 7, 8]
c_4_data = [1, 2, 3, 4, 5, 6, 7, 8]
c_5_data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.25, 7.0, 8.0]
c_6_data = ['1', '2', '3', '4', '5', '6.25', '7', 'osiem']
spec =
describe "Parsing" <|
it "should parse a simple numeric table" <|
simple_empty = (Enso_Project.data / "simple_empty.csv") . read_csv
c_1_data = [1, 4, 7, 10]
c_2_data = [2, Nothing, 8, 11]
c_3_data = [Nothing, 6, 9, 12]
c_1 = Json.from_pairs [["name", "C0"], ["data", c_1_data]]
c_2 = Json.from_pairs [["name", "C1"], ["data", c_2_data]]
c_3 = Json.from_pairs [["name", "C2"], ["data", c_3_data]]
c_4 = Json.from_pairs [["name", "C3"], ["data", c_4_data]]
c_5 = Json.from_pairs [["name", "C4"], ["data", c_5_data]]
c_6 = Json.from_pairs [["name", "C5"], ["data", c_6_data]]
c_1 = Json.from_pairs [["name", "a"], ["data", c_1_data]]
c_2 = Json.from_pairs [["name", "b"], ["data", c_2_data]]
c_3 = Json.from_pairs [["name", "c"], ["data", c_3_data]]
expected = Json.from_pairs [["columns", [c_1, c_2, c_3, c_4, c_5, c_6]]]
varied_column.to_json.should equal expected
expected = Json.from_pairs [["columns", [c_1, c_2, c_3]]]
simple_empty.to_json.should equal expected
it "should correctly infer types of varied-type columns" <|
varied_column = (Enso_Project.data / "varied_column.csv") . read_csv has_header=False
c_1_data = ["2005-02-25", "2005-02-28", "4", "2005-03-02", Nothing, "2005-03-04", "2005-03-07", "2005-03-08"]
c_2_data = ["2005-02-25", "2005-02-28", "2005-03-01", Nothing, "2005-03-03", "2005-03-04", "2005-03-07", "2005-03-08"]
c_3_data = [1, 2, 3, 4, 5, Nothing, 7, 8]
c_4_data = [1, 2, 3, 4, 5, 6, 7, 8]
c_5_data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.25, 7.0, 8.0]
c_6_data = ['1', '2', '3', '4', '5', '6.25', '7', 'osiem']
c_1 = Json.from_pairs [["name", "C0"], ["data", c_1_data]]
c_2 = Json.from_pairs [["name", "C1"], ["data", c_2_data]]
c_3 = Json.from_pairs [["name", "C2"], ["data", c_3_data]]
c_4 = Json.from_pairs [["name", "C3"], ["data", c_4_data]]
c_5 = Json.from_pairs [["name", "C4"], ["data", c_5_data]]
c_6 = Json.from_pairs [["name", "C5"], ["data", c_6_data]]
expected = Json.from_pairs [["columns", [c_1, c_2, c_3, c_4, c_5, c_6]]]
varied_column.to_json.should equal expected
describe "JSON serialization" <|
it "should serialize all column types to correct JSON" <|
c_1 = [1, 2, 3, Nothing]
c_2 = [1.2, 3.4, 5.6, 7.8]
c_3 = [Nothing, 'foo', 'bar', 'baz']
c_4 = [True, False, True, True]
c_5 = [My 1 2, My True False, My 6.3 6.4, [1, 2, 3]]
t = Table.new [['a', c_1], ['b', c_2], ['c', c_3], ['d', c_4], ['e', c_5]]
j_c_1 = Json.from_pairs [["name", "a"], ["data", c_1]]
j_c_2 = Json.from_pairs [["name", "b"], ["data", c_2]]
j_c_3 = Json.from_pairs [["name", "c"], ["data", c_3]]
j_c_4 = Json.from_pairs [["name", "d"], ["data", c_4]]
j_c_5 = Json.from_pairs [["name", "e"], ["data", c_5]]
expected = Json.from_pairs [["columns", [j_c_1, j_c_2, j_c_3, j_c_4, j_c_5]]]
t.to_json.should_equal expected
describe "Mapping operations" <|
it "should allow mapping a function over a column" <|
c_str = Column.from_vector 'x' ['a', 'b', Nothing, 'b']
c_str.map (+ "x") . to_vector . should_equal ['ax', 'bx', Nothing, 'bx']
c_int = Column.from_vector 'x' [1, 2, 1, 5, 1]
c_int.map (+ 1) . to_vector . should_equal [2, 3, 2, 6, 2]
c_dec = Column.from_vector 'x' [1.9, 2.0, 1.2, 5.6, 1.9]
c_dec.map (+ 1.5) . to_vector . should_equal [3.4, 3.5, 2.7, 7.1, 3.4]
c_bool = Column.from_vector 'x' [True, False, Nothing, True, False]
c_bool.map (_.to_text) . to_vector . should_equal ["True", "False", Nothing, "True", "False"]
c_any = Column.from_vector 'x' [My 1 6, My 6 3, My 2 5, My 3 4, My 200 300]
c_any.map (_.frobnicate) . to_vector . should_equal [My 6 1, My 3 6, My 5 2, My 4 3, My 300 200]
it "should handle vectorized equality and fall back on non-vectorized if needed" <|
c_str = Column.from_vector 'x' ['a', 'b', Nothing, 'b']
(c_str == 'b').to_vector.should_equal [False, True, False, True]
c_int = Column.from_vector 'x' [1, 2, 1, 5, 1]
(c_int == 1).to_vector.should_equal [True, False, True, False, True]
c_dec = Column.from_vector 'x' [1.9, 2.0, 1.2, 5.6, 1.9]
(c_dec == 1.9).to_vector.should_equal [True, False, False, False, True]
c_bool = Column.from_vector 'x' [True, False, Nothing, True, False]
(c_bool == False).to_vector.should_equal [False, True, False, False, True]
c_any = Column.from_vector 'x' [My 1 6, My 6 3, My 2 5, My 3 4, My 200 300]
(c_any == My 7 0).to_vector.should_equal [True, False, True, True, False]
describe "Masking tables" <|
it "should allow selecting table rows based on a boolean column" <|
df = (Enso_Project.data / "simple_empty.csv").read_csv
r = df.where (Column.from_vector 'x' [True, False, False, True])
r.at "a" . to_vector . should_equal [1, 10]
r.at "b" . to_vector . should_equal [2, 11]
r.at "c" . to_vector . should_equal [Nothing, 12]
it "should treat NA values in the mask as false and extend the mask with NAs" <|
df = (Enso_Project.data / "simple_empty.csv").read_csv
r = df.where (Column.from_vector 'x' [Nothing, True, False])
r.at "a" . to_vector . should_equal [4]
r.at "b" . to_vector . should_equal [Nothing]
r.at "c" . to_vector . should_equal [6]

View File

@ -43,3 +43,9 @@ spec = describe "Text" <|
text_1.to_text.should_equal "'foo\\nbar\\r\\tbaz'"
text_2 = '\n\t\a\b\f\r\v\e\''
text_2.to_text.should_equal "'\\n\\t\\a\\b\\f\\r\\v\\e\\''"
it "should allow selecting substrings by characters" <|
txt = kshi + facepalm + accent_1 + accent_2
txt.take_first 2 . should_equal (kshi + facepalm)
txt.drop_first 2 . should_equal (accent_1 + accent_2)
txt.take_last 2 . should_equal (accent_1 + accent_2)
txt.drop_last 2 . should_equal (kshi + facepalm)