mirror of
https://github.com/enso-org/enso.git
synced 2024-12-28 10:55:42 +03:00
Tables: column mapping & masking (#1297)
This commit is contained in:
parent
cf9be4ff29
commit
ab2c5ed097
@ -1,5 +1,6 @@
|
||||
from Base import all
|
||||
polyglot java import java.lang.Math
|
||||
polyglot java import java.lang.Double
|
||||
polyglot java import java.lang.String
|
||||
|
||||
## Computes the inverse of the sine function
|
||||
@ -95,3 +96,8 @@ Number.max that = if this > that then this else that
|
||||
Number.to_json : Json.Number
|
||||
Number.to_json = Json.Number this
|
||||
|
||||
## Parses a textual representation of a decimal into a decimal number.
|
||||
Returns `Nothing` if the text does not represent a valid decimal.
|
||||
Decimal.parse : Text -> Decimal | Nothing
|
||||
Decimal.parse text =
|
||||
Panic.recover (Double.parseDouble [text]) . catch (_ -> Nothing)
|
||||
|
@ -176,3 +176,47 @@ Text.to_json = Json.String this
|
||||
Text.repeat : Integer -> Text
|
||||
Text.repeat count =
|
||||
0.up_to count . fold "" acc-> _-> acc + this
|
||||
|
||||
## Creates a new text by removing the first `count` characters of `this`.
|
||||
If `count` is greater than the number of characters in `this`, an empty text
|
||||
is returned.
|
||||
Text.drop_first : Integer -> Text
|
||||
Text.drop_first count =
|
||||
iterator = BreakIterator.getCharacterInstance []
|
||||
iterator.setText [this]
|
||||
iterator.first []
|
||||
boundary = iterator.next [count]
|
||||
if boundary == -1 then '' else Text_Utils.drop_first [this, boundary]
|
||||
|
||||
## Creates a new text by removing the last `count` characters of `this`.
|
||||
If `count` is greater than the number of characters in `this`, an empty text
|
||||
is returned.
|
||||
Text.drop_last : Integer -> Text
|
||||
Text.drop_last count =
|
||||
iterator = BreakIterator.getCharacterInstance []
|
||||
iterator.setText [this]
|
||||
iterator.last []
|
||||
boundary = iterator.next [-count]
|
||||
if boundary == -1 then '' else Text_Utils.substring [this, 0, boundary]
|
||||
|
||||
## Creates a new text by selecting the first `count` characters of `this`.
|
||||
If `count` is greater than the number of characters in `this`, the whole
|
||||
`this` is returned.
|
||||
Text.take_first : Integer -> Text
|
||||
Text.take_first count =
|
||||
iterator = BreakIterator.getCharacterInstance []
|
||||
iterator.setText [this]
|
||||
iterator.first []
|
||||
boundary = iterator.next [count]
|
||||
if boundary == -1 then this else Text_Utils.substring [this, 0, boundary]
|
||||
|
||||
## Creates a new text by selecting the last `count` characters of `this`.
|
||||
If `count` is greater than the number of characters in `this`, the whole
|
||||
`this` is returned.
|
||||
Text.take_last : Integer -> Text
|
||||
Text.take_last count =
|
||||
iterator = BreakIterator.getCharacterInstance []
|
||||
iterator.setText [this]
|
||||
iterator.last []
|
||||
boundary = iterator.next [-count]
|
||||
if boundary == -1 then this else Text_Utils.drop_first [this, boundary]
|
||||
|
@ -28,8 +28,8 @@ export Base.System.File
|
||||
|
||||
from Base.Data.Any.Extensions export all
|
||||
from Base.Data.List export Nil, Cons
|
||||
from Base.Data.Number.Extensions export all hiding Math, String, Double
|
||||
from Base.Data.Noise export all hiding Noise
|
||||
from Base.Data.Number.Extensions export all hiding Math, String
|
||||
from Base.Data.Pair export Pair
|
||||
from Base.Data.Range export Range
|
||||
from Base.Data.Text.Extensions export Text
|
||||
|
@ -297,3 +297,6 @@ read path = (here.new path).read
|
||||
current_directory : File
|
||||
current_directory = File (Prim_Io.get_cwd)
|
||||
|
||||
## Returns the home directory of the current user.
|
||||
home : File
|
||||
home = here.new (Prim_Io.get_user_home)
|
||||
|
143
distribution/std-lib/Table/src/Data/Column.enso
Normal file
143
distribution/std-lib/Table/src/Data/Column.enso
Normal file
@ -0,0 +1,143 @@
|
||||
from Base import all
|
||||
import Table.Data.Table
|
||||
|
||||
polyglot java import org.enso.table.data.table.Column as Java_Column
|
||||
|
||||
type Column
|
||||
type Column java_column
|
||||
|
||||
## Returns a text containing an ASCII-art table displaying this data.
|
||||
|
||||
Arguments:
|
||||
- show_rows: the number of initial rows that should be displayed.
|
||||
display : Integer -> Text
|
||||
display show_rows=10 =
|
||||
java_col = this.java_column
|
||||
col_name = java_col.getName []
|
||||
storage = java_col.getStorage []
|
||||
num_rows = java_col.getSize []
|
||||
display_rows = min num_rows show_rows
|
||||
items = Vector.new display_rows num->
|
||||
[if storage.isNa [num] then "NA" else here.get_item_string storage num]
|
||||
table = Table.print_table [col_name] items
|
||||
if num_rows - display_rows <= 0 then table else
|
||||
missing = '\n\u2026 and ' + (num_rows - display_rows).to_text + ' hidden rows.'
|
||||
table + missing
|
||||
|
||||
## Prints an ASCII-art table with this data to the standard output.
|
||||
|
||||
Arguments:
|
||||
- show_rows: the number of initial rows that should be displayed.
|
||||
print show_rows=10 =
|
||||
IO.println (this.display show_rows)
|
||||
|
||||
## Element-wise equality comparison. Returns a column with results of
|
||||
comparing this column's elements against `other`.
|
||||
== : Any -> Column
|
||||
== other =
|
||||
here.run_vectorized_op this "==" (== other) other
|
||||
|
||||
## Element-wise non-equality comparison. Returns a column with results of
|
||||
comparing this column's elements against `other`.
|
||||
!= : Any -> Column
|
||||
!= other = (this == other).not
|
||||
|
||||
## Boolean negation of each element in this column.
|
||||
not : Column
|
||||
not =
|
||||
here.run_vectorized_op this "not" not Nothing
|
||||
|
||||
## Applies `function` to each item in this column and returns the column
|
||||
of results.
|
||||
map function =
|
||||
storage = this.java_column.getStorage []
|
||||
new_st = storage.map [function]
|
||||
col = Java_Column.new ["Result", new_st].to_array
|
||||
Column col
|
||||
|
||||
## Returns a new column, containing the same elements as `this`, but with
|
||||
the given name.
|
||||
rename name = Column (this.java_column.rename [name])
|
||||
|
||||
## Returns the name of this column.
|
||||
name = this.java_column.getName []
|
||||
|
||||
## Returns the length of this column.
|
||||
length = this.java_column . getSize []
|
||||
|
||||
## Returns the item contained in this column at the given index.
|
||||
at index =
|
||||
storage = this.java_column.getStorage []
|
||||
if storage.isNa [index] then Nothing else
|
||||
storage.getItem [index]
|
||||
|
||||
## Returns a vector containing all the elements in this column.
|
||||
to_vector = Vector.new this.length this.at
|
||||
|
||||
## Returns the underlying storage type of this column.
|
||||
storage_type =
|
||||
tp = this.java_column.getStorage [] . getType []
|
||||
if tp == Storage_Type_String then Text else
|
||||
if tp == Storage_Type_Long then Integer else
|
||||
if tp == Storage_Type_Double then Decimal else
|
||||
if tp == Storage_Type_Bool then Boolean else
|
||||
Any
|
||||
|
||||
## Converts this column to JSON.
|
||||
to_json =
|
||||
col = this.java_column
|
||||
name = col.getName []
|
||||
storage = col.getStorage []
|
||||
json_factory = case this.storage_type of
|
||||
Text -> Json.String
|
||||
Integer -> Json.Number
|
||||
Decimal -> Json.Number
|
||||
Boolean -> Json.Boolean
|
||||
_ -> to_json
|
||||
storage_json = Json.Array (here.storage_to_json storage json_factory)
|
||||
fields = Map.singleton "name" (Json.String name) . insert "data" storage_json
|
||||
Json.Object fields
|
||||
|
||||
## Creates a new column given a name and a vector of elements.
|
||||
from_vector name items = Column (Java_Column.fromItems [name, items.to_array])
|
||||
|
||||
## PRIVATE
|
||||
run_vectorized_op column java_op_name fallback_method operand =
|
||||
storage = column.java_column.getStorage []
|
||||
rs = if storage.isOpVectorized [java_op_name] then storage.runVectorizedOp [java_op_name, operand] else
|
||||
storage.map [fallback_method]
|
||||
Column (Java_Column.new ["Result", rs].to_array)
|
||||
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Keep this in sync with `org.enso.table.data.Storage.Type.LONG`
|
||||
storage_type_long = 1
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Keep this in sync with `org.enso.table.data.Storage.Type.DOUBLE`
|
||||
storage_type_double = 2
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Keep this in sync with `org.enso.table.data.Storage.Type.STRING`
|
||||
storage_type_string = 3
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Keep this in sync with `org.enso.table.data.Storage.Type.BOOL`
|
||||
storage_type_bool = 4
|
||||
|
||||
## PRIVATE
|
||||
storage_to_json storage factory =
|
||||
Vector.new (storage.size []) ix->
|
||||
if storage.isNa [ix] then Json.Null else
|
||||
factory (storage.getItem [ix])
|
||||
|
||||
## PRIVATE
|
||||
get_item_string column ix =
|
||||
tp = column.getType []
|
||||
if tp == Storage_Type_String then column.getItem [ix] else
|
||||
column.getItem [ix] . to_text
|
||||
|
98
distribution/std-lib/Table/src/Data/Table.enso
Normal file
98
distribution/std-lib/Table/src/Data/Table.enso
Normal file
@ -0,0 +1,98 @@
|
||||
from Base import all
|
||||
import Table.Io.Csv
|
||||
import Table.Data.Column
|
||||
|
||||
polyglot java import org.enso.table.data.table.Table as Java_Table
|
||||
|
||||
## Represents a column-oriented table data structure.
|
||||
type Table
|
||||
type Table java_table
|
||||
|
||||
## Returns a text containing an ASCII-art table displaying this data.
|
||||
|
||||
Arguments:
|
||||
- show_rows: the number of initial rows that should be displayed.
|
||||
display : Integer -> Text
|
||||
display show_rows=10 =
|
||||
cols = Vector.Vector (this.java_table.getColumns [])
|
||||
col_names = cols.map (_.getName [])
|
||||
col_vals = cols.map (_.getStorage [])
|
||||
num_rows = this.java_table.nrows []
|
||||
display_rows = min num_rows show_rows
|
||||
rows = Vector.new display_rows row_num->
|
||||
col_vals.map col->
|
||||
if col.isNa [row_num] then "NA" else Column.get_item_string col row_num
|
||||
table = here.print_table col_names rows
|
||||
if num_rows - display_rows <= 0 then table else
|
||||
missing = '\n\u2026 and ' + (num_rows - display_rows).to_text + ' hidden rows.'
|
||||
table + missing
|
||||
|
||||
## Prints an ASCII-art table with this data to the standard output.
|
||||
|
||||
Arguments:
|
||||
- show_rows: the number of initial rows that should be displayed.
|
||||
print show_rows=10 =
|
||||
IO.println (this.display show_rows)
|
||||
|
||||
## Converts this table to a JSON structure.
|
||||
to_json : Json
|
||||
to_json =
|
||||
col_jsons = this.columns.map to_json
|
||||
cols_json = Json.Array col_jsons
|
||||
fields = Map.singleton "columns" cols_json
|
||||
Json.Object fields
|
||||
|
||||
## Returns the column with the given name.
|
||||
at : Text -> Column | Nothing
|
||||
at name = case this.java_table.getColumnByName [name] of
|
||||
Nothing -> Nothing
|
||||
c -> Column.Column c
|
||||
|
||||
## Selects only the rows of this table that correspond to `True` values in
|
||||
`indexes`.
|
||||
This is useful for filtering the rows by given predicate.
|
||||
> Example
|
||||
Select only the rows of `my_table` where the `"Status"` column has the
|
||||
value `"Valid"`
|
||||
my_table.where (my_table.at "Status" == "Valid")
|
||||
where indexes =
|
||||
Table (this.java_table.mask [indexes.java_column])
|
||||
|
||||
## Sets the column value at the given name. If a column with the given name
|
||||
already exists, it will be replaced. Otherwise a new column is added.
|
||||
set name column =
|
||||
Table (this.java_table.addOrReplaceColumn [column.rename name . java_column])
|
||||
|
||||
## Returns the vector of columns contained in this table.
|
||||
columns =
|
||||
Vector.Vector (this.java_table.getColumns []) . map Column.Column
|
||||
|
||||
## PRIVATE
|
||||
from_columns cols = Table (Java_Table.new [cols.to_array].to_array)
|
||||
|
||||
## Creates a new table from a vector of `[name, items]` pairs.
|
||||
|
||||
> Example
|
||||
Create a new table with the given in two columns:
|
||||
Table.new [["foo", [1, 2, 3]], ["bar", [True, False, True]]]
|
||||
new columns =
|
||||
cols = columns.map c->
|
||||
Column.from_vector (c.at 0) (c.at 1) . java_column
|
||||
here.from_columns cols
|
||||
|
||||
## PRIVATE
|
||||
pad txt len =
|
||||
true_len = txt.characters.length
|
||||
txt + (" ".repeat (len - true_len))
|
||||
|
||||
## PRIVATE
|
||||
print_table header rows =
|
||||
content_lengths = Vector.new header.length i->
|
||||
max_row = 0.up_to rows.length . fold 0 a-> j-> max a (rows.at j . at i . characters . length)
|
||||
max max_row (header.at i . characters . length)
|
||||
header_line = zip header content_lengths here.pad . join ' | '
|
||||
divider = content_lengths . map (l -> "-".repeat l+2) . join '+'
|
||||
row_lines = rows.map r->
|
||||
x = zip r content_lengths here.pad . join ' | '
|
||||
" " + x
|
||||
([" " + header_line, divider] + row_lines).join '\n'
|
@ -1,5 +1,5 @@
|
||||
from Base import all
|
||||
import Table.Table
|
||||
import Table.Data.Table
|
||||
|
||||
polyglot java import org.enso.table.format.csv.Parser
|
||||
|
||||
|
@ -1,5 +1,10 @@
|
||||
from Base import all
|
||||
|
||||
import Table.Io.Csv
|
||||
import Table.Data.Table
|
||||
import Table.Data.Column
|
||||
|
||||
from Table.Io.Csv export all hiding Parser
|
||||
export Table.Data.Column
|
||||
from Table.Data.Table export new
|
||||
|
||||
|
@ -1,95 +0,0 @@
|
||||
from Base import all
|
||||
import Table.Io.Csv
|
||||
|
||||
## Represents a column-oriented table data structure.
|
||||
type Table
|
||||
type Table java_table
|
||||
|
||||
## Returns a text containing an ASCII-art table displaying this data.
|
||||
|
||||
Arguments:
|
||||
- show_rows: the number of initial rows that should be displayed.
|
||||
display : Integer -> Text
|
||||
display show_rows=10 =
|
||||
cols = Vector.Vector (this.java_table.getColumns [])
|
||||
col_names = cols.map (_.getName [])
|
||||
col_vals = cols.map (_.getStorage [])
|
||||
num_rows = this.java_table.nrows []
|
||||
display_rows = min num_rows show_rows
|
||||
rows = Vector.new display_rows row_num->
|
||||
col_vals.map col->
|
||||
if col.isNa [row_num] then "NA" else here.get_item_string col row_num
|
||||
table = here.print_table col_names rows
|
||||
if num_rows - display_rows <= 0 then table else
|
||||
missing = '\n\u2026 and ' + (num_rows - display_rows).to_text + ' hidden rows.'
|
||||
table + missing
|
||||
|
||||
## Converts this table to a JSON structure.
|
||||
to_json : Json
|
||||
to_json =
|
||||
col_jsons = Vector.Vector (this.java_table.getColumns []) . map here.column_to_json
|
||||
cols_json = Json.Array col_jsons
|
||||
fields = Map.singleton "columns" cols_json
|
||||
Json.Object fields
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Keep this in sync with `org.enso.table.data.Storage.Type.LONG`
|
||||
storage_type_long = 1
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Keep this in sync with `org.enso.table.data.Storage.Type.DOUBLE`
|
||||
storage_type_double = 2
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Keep this in sync with `org.enso.table.data.Storage.Type.STRING`
|
||||
storage_type_string = 3
|
||||
|
||||
## PRIVATE
|
||||
string_storage_to_json storage =
|
||||
Vector.new (storage.size []) ix->
|
||||
if storage.isNa [ix] then Json.Null else
|
||||
Json.String (storage.getItem [ix])
|
||||
|
||||
## PRIVATE
|
||||
numeric_storage_to_json storage =
|
||||
Vector.new (storage.size []) ix->
|
||||
if storage.isNa [ix] then Json.Null else
|
||||
Json.Number (storage.getItem [ix])
|
||||
|
||||
## PRIVATE
|
||||
column_to_json col =
|
||||
name = col.getName []
|
||||
storage = col.getStorage []
|
||||
storage_type = storage.getType []
|
||||
storage_jsons = if storage_type == Storage_Type_String then here.string_storage_to_json storage else
|
||||
here.numeric_storage_to_json storage
|
||||
fields = Map.singleton "name" (Json.String name) . insert "data" (Json.Array storage_jsons)
|
||||
Json.Object fields
|
||||
|
||||
|
||||
## PRIVATE
|
||||
get_item_string column ix =
|
||||
tp = column.getType []
|
||||
if tp == Storage_Type_String then column.getItem [ix] else
|
||||
column.getItem [ix] . to_text
|
||||
|
||||
## PRIVATE
|
||||
pad txt len =
|
||||
true_len = txt.characters.length
|
||||
txt + (" ".repeat (len - true_len))
|
||||
|
||||
## PRIVATE
|
||||
print_table header rows =
|
||||
content_lengths = Vector.new header.length i->
|
||||
max_row = 0.up_to rows.length . fold 0 a-> j-> max a (rows.at j . at i . characters . length)
|
||||
max max_row (header.at i . characters . length)
|
||||
header_line = zip header content_lengths here.pad . join ' | '
|
||||
divider = content_lengths . map (l -> "-".repeat l+2) . join '+'
|
||||
row_lines = rows.map r->
|
||||
x = zip r content_lengths here.pad . join ' | '
|
||||
" " + x
|
||||
([" " + header_line, divider] + row_lines).join '\n'
|
||||
|
@ -1,10 +1,12 @@
|
||||
package org.enso.interpreter.node.expression.builtin.interop.syntax;
|
||||
|
||||
import com.oracle.truffle.api.dsl.Fallback;
|
||||
import com.oracle.truffle.api.dsl.GenerateUncached;
|
||||
import com.oracle.truffle.api.dsl.ReportPolymorphism;
|
||||
import com.oracle.truffle.api.dsl.Specialization;
|
||||
import com.oracle.truffle.api.dsl.*;
|
||||
import com.oracle.truffle.api.interop.InteropLibrary;
|
||||
import com.oracle.truffle.api.library.CachedLibrary;
|
||||
import com.oracle.truffle.api.nodes.Node;
|
||||
import org.enso.interpreter.Language;
|
||||
import org.enso.interpreter.runtime.Context;
|
||||
import org.enso.interpreter.runtime.callable.atom.Atom;
|
||||
import org.enso.interpreter.runtime.data.text.Text;
|
||||
|
||||
/**
|
||||
@ -56,6 +58,14 @@ public abstract class HostValueToEnsoNode extends Node {
|
||||
return Text.create(txt);
|
||||
}
|
||||
|
||||
@Specialization(guards = "nulls.isNull(o)")
|
||||
Atom doNull(
|
||||
Object o,
|
||||
@CachedLibrary(limit = "3") InteropLibrary nulls,
|
||||
@CachedContext(Language.class) Context ctx) {
|
||||
return ctx.getBuiltins().nothing().newInstance();
|
||||
}
|
||||
|
||||
@Fallback
|
||||
Object doOther(Object o) {
|
||||
return o;
|
||||
|
@ -0,0 +1,19 @@
|
||||
package org.enso.interpreter.node.expression.builtin.io;
|
||||
|
||||
import com.oracle.truffle.api.CompilerDirectives;
|
||||
import com.oracle.truffle.api.nodes.Node;
|
||||
import org.apache.commons.lang3.SystemUtils;
|
||||
import org.enso.interpreter.dsl.BuiltinMethod;
|
||||
import org.enso.interpreter.runtime.data.text.Text;
|
||||
|
||||
@BuiltinMethod(
|
||||
type = "Prim_Io",
|
||||
name = "user_home",
|
||||
description = "Get the text path to the user home directory.")
|
||||
public final class GetUserHomeNode extends Node {
|
||||
private final Text home = Text.create(System.getProperty("user.home"));
|
||||
|
||||
Text execute(Object _this) {
|
||||
return home;
|
||||
}
|
||||
}
|
@ -126,6 +126,7 @@ public class Builtins {
|
||||
scope.registerMethod(io, "readln", ReadlnMethodGen.makeFunction(language));
|
||||
scope.registerMethod(primIo, "get_file", GetFileMethodGen.makeFunction(language));
|
||||
scope.registerMethod(primIo, "get_cwd", GetCwdMethodGen.makeFunction(language));
|
||||
scope.registerMethod(primIo, "get_user_home", GetUserHomeMethodGen.makeFunction(language));
|
||||
|
||||
scope.registerMethod(runtime, "no_inline", NoInlineMethodGen.makeFunction(language));
|
||||
scope.registerMethod(runtime, "gc", GCMethodGen.makeFunction(language));
|
||||
|
@ -4,13 +4,16 @@ import com.oracle.truffle.api.CompilerDirectives;
|
||||
import com.oracle.truffle.api.CompilerDirectives.CompilationFinal;
|
||||
import com.oracle.truffle.api.dsl.Bind;
|
||||
import com.oracle.truffle.api.dsl.Cached;
|
||||
import com.oracle.truffle.api.dsl.CachedContext;
|
||||
import com.oracle.truffle.api.dsl.Specialization;
|
||||
import com.oracle.truffle.api.interop.*;
|
||||
import com.oracle.truffle.api.library.CachedLibrary;
|
||||
import com.oracle.truffle.api.library.ExportLibrary;
|
||||
import com.oracle.truffle.api.library.ExportMessage;
|
||||
import com.oracle.truffle.api.nodes.UnexpectedResultException;
|
||||
import org.enso.interpreter.Language;
|
||||
import org.enso.interpreter.node.expression.builtin.text.util.ToJavaStringNode;
|
||||
import org.enso.interpreter.runtime.Context;
|
||||
import org.enso.interpreter.runtime.callable.UnresolvedSymbol;
|
||||
import org.enso.interpreter.runtime.callable.function.Function;
|
||||
import org.enso.interpreter.runtime.data.Array;
|
||||
@ -168,4 +171,9 @@ public class Atom implements TruffleObject {
|
||||
return Text.create(this.toString());
|
||||
}
|
||||
}
|
||||
|
||||
@ExportMessage
|
||||
boolean isNull(@CachedContext(Language.class) Context ctx) {
|
||||
return this.getConstructor() == ctx.getBuiltins().nothing();
|
||||
}
|
||||
}
|
||||
|
@ -20,6 +20,17 @@ public class Text_Utils {
|
||||
return string.substring(from, to);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new string containing characters starting at the given UTF-16 index.
|
||||
*
|
||||
* @param string the string to trim
|
||||
* @param from number of characters to drop
|
||||
* @return a trimmed string
|
||||
*/
|
||||
public static String drop_first(String string, int from) {
|
||||
return string.substring(from);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a string into an array of UTF-8 bytes.
|
||||
*
|
||||
@ -62,8 +73,8 @@ public class Text_Utils {
|
||||
public static boolean equals(String str1, Object str2) {
|
||||
if (str2 instanceof String) {
|
||||
return Normalizer2.getNFDInstance()
|
||||
.normalize(str1)
|
||||
.equals(Normalizer2.getNFDInstance().normalize((String) str2));
|
||||
.normalize(str1)
|
||||
.equals(Normalizer2.getNFDInstance().normalize((String) str2));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
@ -1,48 +0,0 @@
|
||||
package org.enso.table.data.column;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/** A column containing floating point numbers. */
|
||||
public class DoubleStorage extends Storage {
|
||||
private final long[] data;
|
||||
private final BitSet isMissing;
|
||||
private final int size;
|
||||
|
||||
/**
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
* @param isMissing a bit set denoting at index {@code i} whether or not the value at index {@code
|
||||
* i} is missing.
|
||||
*/
|
||||
public DoubleStorage(long[] data, int size, BitSet isMissing) {
|
||||
this.data = data;
|
||||
this.isMissing = isMissing;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param idx an index
|
||||
* @return the data item contained at the given index.
|
||||
*/
|
||||
public double getItem(long idx) {
|
||||
return Double.longBitsToDouble(data[(int) idx]);
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long getType() {
|
||||
return Type.DOUBLE;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public boolean isNa(long idx) {
|
||||
return isMissing.get((int) idx);
|
||||
}
|
||||
}
|
@ -1,48 +0,0 @@
|
||||
package org.enso.table.data.column;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/** A column storing 64-bit integers. */
|
||||
public class LongStorage extends Storage {
|
||||
private final long[] data;
|
||||
private final BitSet isMissing;
|
||||
private final int size;
|
||||
|
||||
/**
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
* @param isMissing a bit set denoting at index {@code i} whether or not the value at index {@code
|
||||
* i} is missing.
|
||||
*/
|
||||
public LongStorage(long[] data, int size, BitSet isMissing) {
|
||||
this.data = data;
|
||||
this.isMissing = isMissing;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param idx an index
|
||||
* @return the data item contained at the given index.
|
||||
*/
|
||||
public long getItem(long idx) {
|
||||
return data[(int) idx];
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long getType() {
|
||||
return Type.LONG;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public boolean isNa(long idx) {
|
||||
return isMissing.get((int) idx);
|
||||
}
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
package org.enso.table.data.column;
|
||||
|
||||
/** An abstract representation of a data column. */
|
||||
public abstract class Storage {
|
||||
/** @return the number of elements in this column (including NAs) */
|
||||
public abstract long size();
|
||||
|
||||
/** @return the type tag of this column's storage. Must be one of {@link Type} */
|
||||
public abstract long getType();
|
||||
|
||||
/**
|
||||
* Checks whether the value at {@code idx} is missing.
|
||||
*
|
||||
* @param idx the index to check.
|
||||
* @return whether or not the value is missing.
|
||||
*/
|
||||
public abstract boolean isNa(long idx);
|
||||
|
||||
/**
|
||||
* Enumerating possible storage types.
|
||||
*
|
||||
* <p>Keep in sync with variables in {@code Table.Table}. These variables are copied between Enso
|
||||
* and Java code, in order to make them trivially constant on the Enso side, without invoking the
|
||||
* polyglot machinery to access them.
|
||||
*/
|
||||
public static final class Type {
|
||||
public static final long LONG = 1;
|
||||
public static final long DOUBLE = 2;
|
||||
public static final long STRING = 3;
|
||||
}
|
||||
}
|
@ -1,42 +0,0 @@
|
||||
package org.enso.table.data.column;
|
||||
|
||||
/** A column storing strings. */
|
||||
public class StringStorage extends Storage {
|
||||
private final String[] data;
|
||||
private final int size;
|
||||
|
||||
/**
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
*/
|
||||
public StringStorage(String[] data, int size) {
|
||||
this.data = data;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param idx an index
|
||||
* @return the data item contained at the given index.
|
||||
*/
|
||||
public String getItem(long idx) {
|
||||
return data[(int) idx];
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long getType() {
|
||||
return Type.STRING;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public boolean isNa(long idx) {
|
||||
return data[(int) idx] == null;
|
||||
}
|
||||
}
|
@ -0,0 +1,63 @@
|
||||
package org.enso.table.data.column.builder.object;
|
||||
|
||||
import org.enso.table.data.column.storage.BoolStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* A builder for boolean columns.
|
||||
*/
|
||||
public class BoolBuilder extends TypedBuilder {
|
||||
private final BitSet vals = new BitSet();
|
||||
private final BitSet isNa = new BitSet();
|
||||
int size = 0;
|
||||
|
||||
@Override
|
||||
public void append(Object o) {
|
||||
if (o == null) {
|
||||
isNa.set(size);
|
||||
} else {
|
||||
if ((Boolean) o) {
|
||||
vals.set(size);
|
||||
}
|
||||
}
|
||||
size++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
return new BoolStorage(vals, isNa, size, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCurrentSize() {
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(Object[] items) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (isNa.get(i)) {
|
||||
items[i] = null;
|
||||
} else {
|
||||
items[i] = vals.get(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRetypeTo(long type) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TypedBuilder retypeTo(long type) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getType() {
|
||||
return Storage.Type.BOOL;
|
||||
}
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package org.enso.table.data.column.builder.object;
|
||||
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
/** A builder for creating columns dynamically. */
|
||||
public abstract class Builder {
|
||||
/**
|
||||
* Append a new item to this builder.
|
||||
*
|
||||
* @param o the item to append
|
||||
*/
|
||||
public abstract void append(Object o);
|
||||
|
||||
/** @return the number of appended elements */
|
||||
public abstract int getCurrentSize();
|
||||
|
||||
/** @return a storage containing all the items appended so far */
|
||||
public abstract Storage seal();
|
||||
}
|
@ -0,0 +1,124 @@
|
||||
package org.enso.table.data.column.builder.object;
|
||||
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
/**
|
||||
* A builder performing type inference on the appended elements, choosing the best possible storage.
|
||||
*/
|
||||
public class InferredBuilder extends Builder {
|
||||
private TypedBuilder currentBuilder = null;
|
||||
private int currentSize = 0;
|
||||
private final int size;
|
||||
|
||||
/**
|
||||
* Creates a new instance of this builder, with the given known result size.
|
||||
*
|
||||
* @param size the result size
|
||||
*/
|
||||
public InferredBuilder(int size) {
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(Object o) {
|
||||
if (currentBuilder == null) {
|
||||
if (o == null) {
|
||||
currentSize++;
|
||||
return;
|
||||
} else {
|
||||
initBuilderFor(o);
|
||||
}
|
||||
}
|
||||
if (o == null) {
|
||||
currentBuilder.append(o);
|
||||
} else {
|
||||
switch (currentBuilder.getType()) {
|
||||
case Storage.Type.BOOL:
|
||||
if (o instanceof Boolean) {
|
||||
currentBuilder.append(o);
|
||||
} else {
|
||||
retypeAndAppend(o);
|
||||
}
|
||||
break;
|
||||
case Storage.Type.LONG:
|
||||
if (o instanceof Long) {
|
||||
currentBuilder.append(o);
|
||||
} else {
|
||||
retypeAndAppend(o);
|
||||
}
|
||||
break;
|
||||
case Storage.Type.DOUBLE:
|
||||
if (o instanceof Double) {
|
||||
currentBuilder.append(o);
|
||||
} else if (o instanceof Long) {
|
||||
currentBuilder.append(((Long) o).doubleValue());
|
||||
} else {
|
||||
retypeAndAppend(o);
|
||||
}
|
||||
break;
|
||||
case Storage.Type.STRING:
|
||||
if (o instanceof String) {
|
||||
currentBuilder.append(o);
|
||||
} else {
|
||||
retypeAndAppend(o);
|
||||
}
|
||||
break;
|
||||
case Storage.Type.OBJECT:
|
||||
currentBuilder.append(o);
|
||||
break;
|
||||
}
|
||||
}
|
||||
currentSize++;
|
||||
}
|
||||
|
||||
private void initBuilderFor(Object o) {
|
||||
if (o instanceof Boolean) {
|
||||
currentBuilder = new BoolBuilder();
|
||||
} else if (o instanceof Double) {
|
||||
currentBuilder = NumericBuilder.createDoubleBuilder(size);
|
||||
} else if (o instanceof Long) {
|
||||
currentBuilder = NumericBuilder.createLongBuilder(size);
|
||||
} else if (o instanceof String) {
|
||||
currentBuilder = new StringBuilder(size);
|
||||
} else {
|
||||
currentBuilder = new ObjectBuilder(size);
|
||||
}
|
||||
for (int i = 0; i < currentSize; i++) {
|
||||
currentBuilder.append(null);
|
||||
}
|
||||
}
|
||||
|
||||
private void retypeAndAppend(Object o) {
|
||||
if (o instanceof Double && currentBuilder.canRetypeTo(Storage.Type.DOUBLE)) {
|
||||
currentBuilder = currentBuilder.retypeTo(Storage.Type.DOUBLE);
|
||||
} else if (o instanceof String && currentBuilder.canRetypeTo(Storage.Type.STRING)) {
|
||||
currentBuilder = currentBuilder.retypeTo(Storage.Type.STRING);
|
||||
} else if (o instanceof Long && currentBuilder.canRetypeTo(Storage.Type.LONG)) {
|
||||
currentBuilder = currentBuilder.retypeTo(Storage.Type.LONG);
|
||||
} else if (o instanceof Boolean && currentBuilder.canRetypeTo(Storage.Type.BOOL)) {
|
||||
currentBuilder = currentBuilder.retypeTo(Storage.Type.BOOL);
|
||||
} else if (currentBuilder.canRetypeTo(Storage.Type.OBJECT)) {
|
||||
currentBuilder = currentBuilder.retypeTo(Storage.Type.OBJECT);
|
||||
} else {
|
||||
retypeToObject();
|
||||
}
|
||||
currentBuilder.append(o);
|
||||
}
|
||||
|
||||
private void retypeToObject() {
|
||||
ObjectBuilder objectBuilder = new ObjectBuilder(size);
|
||||
currentBuilder.writeTo(objectBuilder.getData());
|
||||
objectBuilder.setCurrentSize(currentBuilder.getCurrentSize());
|
||||
currentBuilder = objectBuilder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCurrentSize() {
|
||||
return currentSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
return currentBuilder.seal();
|
||||
}
|
||||
}
|
@ -0,0 +1,95 @@
|
||||
package org.enso.table.data.column.builder.object;
|
||||
|
||||
import org.enso.table.data.column.storage.DoubleStorage;
|
||||
import org.enso.table.data.column.storage.LongStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* A builder for numeric columns.
|
||||
*/
|
||||
public class NumericBuilder extends TypedBuilder {
|
||||
private boolean isDouble;
|
||||
private int currentSize;
|
||||
private final int size;
|
||||
private final BitSet isMissing = new BitSet();
|
||||
private final long[] data;
|
||||
|
||||
private NumericBuilder(boolean isDouble, int size) {
|
||||
this.size = size;
|
||||
this.data = new long[size];
|
||||
this.isDouble = isDouble;
|
||||
}
|
||||
|
||||
public static NumericBuilder createDoubleBuilder(int size) {
|
||||
return new NumericBuilder(true, size);
|
||||
}
|
||||
|
||||
public static NumericBuilder createLongBuilder(int size) {
|
||||
return new NumericBuilder(false, size);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(Object[] items) {
|
||||
for (int i = 0; i < currentSize; i++) {
|
||||
if (isMissing.get(i)) {
|
||||
items[i] = null;
|
||||
} else if (isDouble) {
|
||||
items[i] = Double.longBitsToDouble(data[i]);
|
||||
} else {
|
||||
items[i] = data[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRetypeTo(long type) {
|
||||
return !this.isDouble && type == Storage.Type.DOUBLE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TypedBuilder retypeTo(long type) {
|
||||
if (!this.isDouble && type == Storage.Type.DOUBLE) {
|
||||
this.isDouble = true;
|
||||
for (int i = 0; i < currentSize; i++) {
|
||||
data[i] = Double.doubleToRawLongBits(data[i]);
|
||||
}
|
||||
return this;
|
||||
} else {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getType() {
|
||||
return isDouble ? Storage.Type.DOUBLE : Storage.Type.LONG;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(Object o) {
|
||||
if (o == null) {
|
||||
isMissing.set(currentSize++);
|
||||
} else if (isDouble && o instanceof Double) {
|
||||
data[currentSize++] = Double.doubleToRawLongBits((Double) o);
|
||||
} else if (!isDouble && o instanceof Long) {
|
||||
data[currentSize++] = (Long) o;
|
||||
} else {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCurrentSize() {
|
||||
return currentSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
if (isDouble) {
|
||||
return new DoubleStorage(data, size, isMissing);
|
||||
} else {
|
||||
return new LongStorage(data, size, isMissing);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,64 @@
|
||||
package org.enso.table.data.column.builder.object;
|
||||
|
||||
import org.enso.table.data.column.storage.ObjectStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
/** A builder for boxed object columns. */
|
||||
public class ObjectBuilder extends TypedBuilder {
|
||||
private final Object[] data;
|
||||
private final int size;
|
||||
private int currentSize = 0;
|
||||
|
||||
public ObjectBuilder(int size) {
|
||||
this.size = size;
|
||||
this.data = new Object[size];
|
||||
}
|
||||
|
||||
public ObjectBuilder(Object[] data, int size) {
|
||||
this.data = data;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(Object[] items) {
|
||||
throw new IllegalStateException("Broken invariant: rewriting the most general type.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRetypeTo(long type) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TypedBuilder retypeTo(long type) {
|
||||
throw new IllegalStateException("Broken invariant: rewriting the most general type.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getType() {
|
||||
return Storage.Type.OBJECT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(Object o) {
|
||||
data[currentSize++] = o;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCurrentSize() {
|
||||
return currentSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
return new ObjectStorage(data, size);
|
||||
}
|
||||
|
||||
public Object[] getData() {
|
||||
return data;
|
||||
}
|
||||
|
||||
public void setCurrentSize(int currentSize) {
|
||||
this.currentSize = currentSize;
|
||||
}
|
||||
}
|
@ -0,0 +1,59 @@
|
||||
package org.enso.table.data.column.builder.object;
|
||||
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.column.storage.StringStorage;
|
||||
|
||||
/** A builder for string columns. */
|
||||
public class StringBuilder extends TypedBuilder {
|
||||
private final Object[] data;
|
||||
private final int size;
|
||||
private int currentSize = 0;
|
||||
|
||||
public StringBuilder(int size) {
|
||||
this.data = new Object[size];
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(Object[] items) {
|
||||
for (int i = 0; i < currentSize; i++) {
|
||||
items[i] = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRetypeTo(long type) {
|
||||
return type == Storage.Type.OBJECT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TypedBuilder retypeTo(long type) {
|
||||
if (type == Storage.Type.OBJECT) {
|
||||
ObjectBuilder res = new ObjectBuilder(data, size);
|
||||
res.setCurrentSize(currentSize);
|
||||
return res;
|
||||
} else {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getType() {
|
||||
return Storage.Type.STRING;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(Object o) {
|
||||
data[currentSize++] = o;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCurrentSize() {
|
||||
return currentSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
return new StringStorage(data, size);
|
||||
}
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
package org.enso.table.data.column.builder.object;
|
||||
|
||||
/** A builder for the given storage type and known result size. */
|
||||
public abstract class TypedBuilder extends Builder {
|
||||
/**
|
||||
* Dump all the items into a given boxed buffer.
|
||||
*
|
||||
* @param items the buffer to dump elements into
|
||||
*/
|
||||
public abstract void writeTo(Object[] items);
|
||||
|
||||
/**
|
||||
* Checks if the builder can be efficiently retyped to the given storage type.
|
||||
*
|
||||
* @param type the storage type enumeration
|
||||
* @return whether the column can be retyped
|
||||
*/
|
||||
public abstract boolean canRetypeTo(long type);
|
||||
|
||||
/**
|
||||
* Retype this builder to the given type. Can only be called if {@link #canRetypeTo(long)} returns
|
||||
* true for the type.
|
||||
*
|
||||
* @param type the target type
|
||||
* @return a retyped builder
|
||||
*/
|
||||
public abstract TypedBuilder retypeTo(long type);
|
||||
|
||||
/** @return the current storage type of this builder */
|
||||
public abstract int getType();
|
||||
}
|
@ -1,8 +1,8 @@
|
||||
package org.enso.table.data.column.builder;
|
||||
package org.enso.table.data.column.builder.string;
|
||||
|
||||
import org.enso.table.data.column.DoubleStorage;
|
||||
import org.enso.table.data.column.LongStorage;
|
||||
import org.enso.table.data.column.Storage;
|
||||
import org.enso.table.data.column.storage.DoubleStorage;
|
||||
import org.enso.table.data.column.storage.LongStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package org.enso.table.data.column.builder;
|
||||
package org.enso.table.data.column.builder.string;
|
||||
|
||||
import org.enso.table.data.column.Storage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
/** A builder used by the parser to add items into a column. */
|
||||
public abstract class StorageBuilder {
|
@ -1,12 +1,12 @@
|
||||
package org.enso.table.data.column.builder;
|
||||
package org.enso.table.data.column.builder.string;
|
||||
|
||||
import org.enso.table.data.column.Storage;
|
||||
import org.enso.table.data.column.StringStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.column.storage.StringStorage;
|
||||
|
||||
/** A column builder appending all the values passed to it in an unchanged form. */
|
||||
public class StringStorageBuilder extends StorageBuilder {
|
||||
|
||||
private String[] data;
|
||||
private Object[] data;
|
||||
private int size;
|
||||
|
||||
/**
|
||||
@ -37,7 +37,7 @@ public class StringStorageBuilder extends StorageBuilder {
|
||||
|
||||
private void ensureAppendable() {
|
||||
if (size >= data.length) {
|
||||
String[] newData = new String[2 * data.length];
|
||||
Object[] newData = new Object[2 * data.length];
|
||||
System.arraycopy(data, 0, newData, 0, data.length);
|
||||
data = newData;
|
||||
}
|
@ -0,0 +1,104 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* A boolean column storage.
|
||||
*/
|
||||
public class BoolStorage extends Storage {
|
||||
private final BitSet values;
|
||||
private final BitSet isMissing;
|
||||
private final int size;
|
||||
private final boolean negated;
|
||||
|
||||
public BoolStorage(BitSet values, BitSet isMissing, int size, boolean negated) {
|
||||
this.values = values;
|
||||
this.isMissing = isMissing;
|
||||
this.size = size;
|
||||
this.negated = negated;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getType() {
|
||||
return Type.BOOL;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getItemBoxed(int idx) {
|
||||
return isMissing.get(idx) ? null : values.get(idx);
|
||||
}
|
||||
|
||||
public boolean getItem(long idx) {
|
||||
return negated != values.get((int) idx);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isNa(long idx) {
|
||||
return isMissing.get((int) idx);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isOpVectorized(String op) {
|
||||
return op.equals(Ops.EQ) || op.equals(Ops.NOT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runVectorizedOp(String name, Object operand) {
|
||||
if (Ops.EQ.equals(name)) {
|
||||
return runVectorizedEq(operand);
|
||||
} else if (Ops.NOT.equals(name)) {
|
||||
return new BoolStorage(values, isMissing, size, !negated);
|
||||
}
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
private BoolStorage runVectorizedEq(Object operand) {
|
||||
if (operand instanceof Boolean) {
|
||||
if ((Boolean) operand) {
|
||||
return this;
|
||||
} else {
|
||||
BitSet newVals = new BitSet();
|
||||
newVals.or(values);
|
||||
newVals.flip(0, size);
|
||||
newVals.andNot(isMissing);
|
||||
return new BoolStorage(newVals, new BitSet(), size, false);
|
||||
}
|
||||
} else {
|
||||
return new BoolStorage(new BitSet(), new BitSet(), size, false);
|
||||
}
|
||||
}
|
||||
|
||||
public BitSet getValues() {
|
||||
return values;
|
||||
}
|
||||
|
||||
public BitSet getIsMissing() {
|
||||
return isMissing;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage mask(BitSet mask, int cardinality) {
|
||||
BitSet newMissing = new BitSet();
|
||||
BitSet newValues = new BitSet();
|
||||
int resultIx = 0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (mask.get(i)) {
|
||||
if (isMissing.get(i)) {
|
||||
newMissing.set(resultIx++);
|
||||
} else if (values.get(i)) {
|
||||
newValues.set(resultIx++);
|
||||
}
|
||||
}
|
||||
}
|
||||
return new BoolStorage(newValues, newMissing, cardinality, negated);
|
||||
}
|
||||
|
||||
public boolean isNegated() {
|
||||
return negated;
|
||||
}
|
||||
}
|
@ -0,0 +1,101 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.function.Function;
|
||||
|
||||
/** A column containing floating point numbers. */
|
||||
public class DoubleStorage extends Storage {
|
||||
private final long[] data;
|
||||
private final BitSet isMissing;
|
||||
private final int size;
|
||||
private static final long NAN = 0x7ff0000000000000L;
|
||||
|
||||
/**
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
* @param isMissing a bit set denoting at index {@code i} whether or not the value at index {@code
|
||||
* i} is missing.
|
||||
*/
|
||||
public DoubleStorage(long[] data, int size, BitSet isMissing) {
|
||||
this.data = data;
|
||||
this.isMissing = isMissing;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param idx an index
|
||||
* @return the data item contained at the given index.
|
||||
*/
|
||||
public double getItem(long idx) {
|
||||
return Double.longBitsToDouble(data[(int) idx]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getItemBoxed(int idx) {
|
||||
return isMissing.get(idx) ? null : Double.longBitsToDouble(data[idx]);
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long getType() {
|
||||
return Type.DOUBLE;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public boolean isNa(long idx) {
|
||||
return isMissing.get((int) idx);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isOpVectorized(String op) {
|
||||
return op.equals("==");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runVectorizedOp(String name, Object operand) {
|
||||
if (name.equals("==")) {
|
||||
return runVectorizedEq(operand);
|
||||
}
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
private BoolStorage runVectorizedEq(Object operand) {
|
||||
BitSet isNa = new BitSet();
|
||||
BitSet values = new BitSet();
|
||||
if (operand instanceof Double) {
|
||||
long seek = Double.doubleToRawLongBits((Double) operand);
|
||||
if ((seek & NAN) != NAN) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (data[i] == seek && (data[i] & NAN) != NAN && !isMissing.get(i)) {
|
||||
values.set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return new BoolStorage(values, isNa, size, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleStorage mask(BitSet mask, int cardinality) {
|
||||
BitSet newMissing = new BitSet();
|
||||
long[] newData = new long[cardinality];
|
||||
int resIx = 0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (mask.get(i)) {
|
||||
if (isMissing.get(i)) {
|
||||
newMissing.set(resIx++);
|
||||
} else {
|
||||
newData[resIx++] = data[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
return new DoubleStorage(newData, cardinality, newMissing);
|
||||
}
|
||||
}
|
@ -0,0 +1,97 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/** A column storing 64-bit integers. */
|
||||
public class LongStorage extends Storage {
|
||||
private final long[] data;
|
||||
private final BitSet isMissing;
|
||||
private final int size;
|
||||
|
||||
/**
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
* @param isMissing a bit set denoting at index {@code i} whether or not the value at index {@code
|
||||
* i} is missing.
|
||||
*/
|
||||
public LongStorage(long[] data, int size, BitSet isMissing) {
|
||||
this.data = data;
|
||||
this.isMissing = isMissing;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param idx an index
|
||||
* @return the data item contained at the given index.
|
||||
*/
|
||||
public long getItem(long idx) {
|
||||
return data[(int) idx];
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getItemBoxed(int idx) {
|
||||
return isMissing.get(idx) ? null : data[idx];
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long getType() {
|
||||
return Type.LONG;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public boolean isNa(long idx) {
|
||||
return isMissing.get((int) idx);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isOpVectorized(String op) {
|
||||
return Ops.EQ.equals(op);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runVectorizedOp(String name, Object operand) {
|
||||
if (Ops.EQ.equals(name)) {
|
||||
return runVectorizedEq(operand);
|
||||
}
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
BoolStorage runVectorizedEq(Object operand) {
|
||||
BitSet isNa = new BitSet();
|
||||
BitSet values = new BitSet();
|
||||
if (operand instanceof Long) {
|
||||
long seek = (Long) operand;
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (data[i] == seek && !isMissing.get(i)) {
|
||||
values.set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return new BoolStorage(values, isNa, size, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongStorage mask(BitSet mask, int cardinality) {
|
||||
BitSet newMissing = new BitSet();
|
||||
long[] newData = new long[cardinality];
|
||||
int resIx = 0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (mask.get(i)) {
|
||||
if (isMissing.get(i)) {
|
||||
newMissing.set(resIx++);
|
||||
} else {
|
||||
newData[resIx++] = data[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
return new LongStorage(newData, cardinality, newMissing);
|
||||
}
|
||||
}
|
@ -0,0 +1,80 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import org.enso.table.data.column.builder.object.BoolBuilder;
|
||||
import org.enso.table.data.column.builder.object.Builder;
|
||||
import org.enso.table.data.column.builder.object.InferredBuilder;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.function.Function;
|
||||
|
||||
/** A column storing arbitrary objects. */
|
||||
public class ObjectStorage extends Storage {
|
||||
private final Object[] data;
|
||||
private final int size;
|
||||
|
||||
/**
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
*/
|
||||
public ObjectStorage(Object[] data, int size) {
|
||||
this.data = data;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param idx an index
|
||||
* @return the data item contained at the given index.
|
||||
*/
|
||||
public Object getItem(long idx) {
|
||||
return data[(int) idx];
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getItemBoxed(int idx) {
|
||||
return data[idx];
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long getType() {
|
||||
return Type.OBJECT;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public boolean isNa(long idx) {
|
||||
return data[(int) idx] == null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isOpVectorized(String op) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runVectorizedOp(String name, Object operand) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ObjectStorage mask(BitSet mask, int cardinality) {
|
||||
Object[] newData = new Object[cardinality];
|
||||
int resIx = 0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (mask.get(i)) {
|
||||
newData[resIx++] = data[i];
|
||||
}
|
||||
}
|
||||
return new ObjectStorage(newData, cardinality);
|
||||
}
|
||||
|
||||
protected Object[] getData() {
|
||||
return data;
|
||||
}
|
||||
}
|
@ -0,0 +1,99 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import org.enso.table.data.column.builder.object.Builder;
|
||||
import org.enso.table.data.column.builder.object.InferredBuilder;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.function.Function;
|
||||
|
||||
/** An abstract representation of a data column. */
|
||||
public abstract class Storage {
|
||||
/** @return the number of elements in this column (including NAs) */
|
||||
public abstract long size();
|
||||
|
||||
/** @return the type tag of this column's storage. Must be one of {@link Type} */
|
||||
public abstract long getType();
|
||||
|
||||
/**
|
||||
* Checks whether the value at {@code idx} is missing.
|
||||
*
|
||||
* @param idx the index to check.
|
||||
* @return whether or not the value is missing.
|
||||
*/
|
||||
public abstract boolean isNa(long idx);
|
||||
|
||||
/**
|
||||
* Returns a boxed representation of an item. Missing values are denoted with null.
|
||||
*
|
||||
* @param idx the index to look up
|
||||
* @return the item at position {@code idx}
|
||||
*/
|
||||
public abstract Object getItemBoxed(int idx);
|
||||
|
||||
/**
|
||||
* Enumerating possible storage types.
|
||||
*
|
||||
* <p>Keep in sync with variables in {@code Table.Table}. These variables are copied between Enso
|
||||
* and Java code, in order to make them trivially constant on the Enso side, without invoking the
|
||||
* polyglot machinery to access them.
|
||||
*/
|
||||
public static final class Type {
|
||||
public static final int LONG = 1;
|
||||
public static final int DOUBLE = 2;
|
||||
public static final int STRING = 3;
|
||||
public static final int BOOL = 4;
|
||||
public static final int OBJECT = 5;
|
||||
}
|
||||
|
||||
/** A container for names of vectorizable operation. */
|
||||
public static final class Ops {
|
||||
public static final String EQ = "==";
|
||||
public static final String NOT = "not";
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether a vectorized version of operation exists for this storage.
|
||||
*
|
||||
* @param name the operation name
|
||||
* @return whether a vectorized version is available
|
||||
*/
|
||||
public abstract boolean isOpVectorized(String name);
|
||||
|
||||
/**
|
||||
* Runs a vectorized operation on this storage. Can only be used if {@link
|
||||
* #isOpVectorized(String)} returns true.
|
||||
*
|
||||
* @param name the operation to run
|
||||
* @param operand an argument to the operation
|
||||
* @return the result of running operation over this storage
|
||||
*/
|
||||
public abstract Storage runVectorizedOp(String name, Object operand);
|
||||
|
||||
/**
|
||||
* Return a new storage, containing only the items marked true in the mask.
|
||||
*
|
||||
* @param mask the mask to use
|
||||
* @param cardinality the number of true values in mask
|
||||
* @return a new storage, masked with the given mask
|
||||
*/
|
||||
public abstract Storage mask(BitSet mask, int cardinality);
|
||||
|
||||
/**
|
||||
* Runs a function on each non-missing element in this storage and gathers the results.
|
||||
*
|
||||
* @param function the function to run.
|
||||
* @return the result of running the function on all non-missing elements.
|
||||
*/
|
||||
public final Storage map(Function<Object, Object> function) {
|
||||
Builder builder = new InferredBuilder((int) size());
|
||||
for (int i = 0; i < size(); i++) {
|
||||
Object it = getItemBoxed(i);
|
||||
if (it == null) {
|
||||
builder.append(null);
|
||||
} else {
|
||||
builder.append(function.apply(it));
|
||||
}
|
||||
}
|
||||
return builder.seal();
|
||||
}
|
||||
}
|
@ -0,0 +1,61 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/** A column storing strings. */
|
||||
public class StringStorage extends ObjectStorage {
|
||||
|
||||
/**
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
*/
|
||||
public StringStorage(Object[] data, int size) {
|
||||
super(data, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param idx an index
|
||||
* @return the data item contained at the given index.
|
||||
*/
|
||||
public String getItem(long idx) {
|
||||
return (String) super.getItem(idx);
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long getType() {
|
||||
return Type.STRING;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isOpVectorized(String op) {
|
||||
return op.equals("==");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runVectorizedOp(String name, Object operand) {
|
||||
if (Ops.EQ.equals(name)) {
|
||||
return runVectorizedEq(operand);
|
||||
}
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public BoolStorage runVectorizedEq(Object that) {
|
||||
Object[] data = getData();
|
||||
int size = (int) size();
|
||||
BitSet values = new BitSet();
|
||||
BitSet missing = new BitSet();
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (!(data[i] == null) && data[i].equals(that)) {
|
||||
values.set(i);
|
||||
}
|
||||
}
|
||||
return new BoolStorage(values, missing, size, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringStorage mask(BitSet mask, int cardinality) {
|
||||
ObjectStorage storage = super.mask(mask, cardinality);
|
||||
return new StringStorage(storage.getData(), cardinality);
|
||||
}
|
||||
}
|
@ -1,6 +1,10 @@
|
||||
package org.enso.table.data.table;
|
||||
|
||||
import org.enso.table.data.column.Storage;
|
||||
import org.enso.table.data.column.builder.object.InferredBuilder;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
|
||||
/** A representation of a column. Consists of a column name and the underlying storage. */
|
||||
public class Column {
|
||||
@ -27,4 +31,45 @@ public class Column {
|
||||
public Storage getStorage() {
|
||||
return storage;
|
||||
}
|
||||
|
||||
/** @return the number of items in this column. */
|
||||
public long getSize() {
|
||||
return getStorage().size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a new column, containing only the items marked true in the mask.
|
||||
*
|
||||
* @param mask the mask to use
|
||||
* @param cardinality the number of true values in mask
|
||||
* @return a new column, masked with the given mask
|
||||
*/
|
||||
public Column mask(BitSet mask, int cardinality) {
|
||||
return new Column(name, storage.mask(mask, cardinality));
|
||||
}
|
||||
|
||||
/**
|
||||
* Renames the column.
|
||||
*
|
||||
* @param name the new name
|
||||
* @return a new column with the given name
|
||||
*/
|
||||
public Column rename(String name) {
|
||||
return new Column(name, storage);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new column with given name and elements.
|
||||
*
|
||||
* @param name the name to use
|
||||
* @param items the items contained in the column
|
||||
* @return a column with given name and items
|
||||
*/
|
||||
public static Column fromItems(String name, List<Object> items) {
|
||||
InferredBuilder builder = new InferredBuilder(items.size());
|
||||
for (Object item : items) {
|
||||
builder.append(item);
|
||||
}
|
||||
return new Column(name, builder.seal());
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,10 @@
|
||||
package org.enso.table.data.table;
|
||||
|
||||
import org.enso.table.data.column.storage.BoolStorage;
|
||||
import org.enso.table.error.UnexpectedColumnTypeException;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/** A representation of a table structure. */
|
||||
public class Table {
|
||||
|
||||
@ -27,4 +32,80 @@ public class Table {
|
||||
public Column[] getColumns() {
|
||||
return columns;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a column with the given name, or null if it doesn't exist.
|
||||
*
|
||||
* @param name the column name
|
||||
* @return a column with the given name
|
||||
*/
|
||||
public Column getColumnByName(String name) {
|
||||
for (Column column : columns) {
|
||||
if (column.getName().equals(name)) {
|
||||
return column;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a table resulting from selecting only the rows corresponding to true entries in the
|
||||
* provided column.
|
||||
*
|
||||
* @param maskCol the masking column
|
||||
* @return the result of masking this table with the provided column
|
||||
*/
|
||||
public Table mask(Column maskCol) {
|
||||
if (!(maskCol.getStorage() instanceof BoolStorage)) {
|
||||
throw new UnexpectedColumnTypeException("Boolean");
|
||||
}
|
||||
BoolStorage storage = (BoolStorage) maskCol.getStorage();
|
||||
BitSet mask = new BitSet();
|
||||
mask.or(storage.getValues());
|
||||
if (storage.isNegated()) {
|
||||
mask.flip(0, (int) storage.size());
|
||||
}
|
||||
mask.andNot(storage.getIsMissing());
|
||||
int cardinality = mask.cardinality();
|
||||
Column[] newColumns = new Column[columns.length];
|
||||
for (int i = 0; i < columns.length; i++) {
|
||||
newColumns[i] = columns[i].mask(mask, cardinality);
|
||||
}
|
||||
return new Table(newColumns);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a column, or replaces it, by name.
|
||||
*
|
||||
* @param newColumn the column to include.
|
||||
* @return a new table containing the specified column.
|
||||
*/
|
||||
public Table addOrReplaceColumn(Column newColumn) {
|
||||
int existingIx = -1;
|
||||
for (int i = 0; i < columns.length; i++) {
|
||||
if (columns[i].getName().equals(newColumn.getName())) {
|
||||
existingIx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (existingIx == -1) {
|
||||
return addColumn(newColumn);
|
||||
} else {
|
||||
return replaceColumn(existingIx, newColumn);
|
||||
}
|
||||
}
|
||||
|
||||
private Table replaceColumn(int ix, Column newCol) {
|
||||
Column[] newCols = new Column[columns.length];
|
||||
System.arraycopy(columns, 0, newCols, 0, columns.length);
|
||||
newCols[ix] = newCol;
|
||||
return new Table(newCols);
|
||||
}
|
||||
|
||||
private Table addColumn(Column newColumn) {
|
||||
Column[] newCols = new Column[columns.length + 1];
|
||||
System.arraycopy(columns, 0, newCols, 0, columns.length);
|
||||
newCols[columns.length] = newColumn;
|
||||
return new Table(newCols);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,21 @@
|
||||
package org.enso.table.error;
|
||||
|
||||
/** An error thrown when a type error is encountered. */
|
||||
public class UnexpectedColumnTypeException extends RuntimeException {
|
||||
private final String expected;
|
||||
|
||||
/**
|
||||
* Creates a new instance of this error.
|
||||
*
|
||||
* @param expected the expected type description
|
||||
*/
|
||||
public UnexpectedColumnTypeException(String expected) {
|
||||
super("Unexpected column type. Expected a " + expected + " column.");
|
||||
this.expected = expected;
|
||||
}
|
||||
|
||||
/** @return the expected type descriptor */
|
||||
public String getExpected() {
|
||||
return expected;
|
||||
}
|
||||
}
|
@ -2,9 +2,9 @@ package org.enso.table.format.csv;
|
||||
|
||||
import com.univocity.parsers.csv.CsvParser;
|
||||
import com.univocity.parsers.csv.CsvParserSettings;
|
||||
import org.enso.table.data.column.Storage;
|
||||
import org.enso.table.data.column.builder.StorageBuilder;
|
||||
import org.enso.table.data.column.builder.PrimInferredStorageBuilder;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.column.builder.string.StorageBuilder;
|
||||
import org.enso.table.data.column.builder.string.PrimInferredStorageBuilder;
|
||||
import org.enso.table.data.table.Column;
|
||||
import org.enso.table.data.table.Table;
|
||||
|
||||
|
@ -2,35 +2,104 @@ from Base import all
|
||||
from Table import all
|
||||
import Test
|
||||
|
||||
spec = describe "Tables" <|
|
||||
it "should parse a simple numeric table and convert it to JSON" <|
|
||||
simple_empty = (Enso_Project.data / "simple_empty.csv") . read_csv
|
||||
c_1_data = [1, 4, 7, 10]
|
||||
c_2_data = [2, Nothing, 8, 11]
|
||||
c_3_data = [Nothing, 6, 9, 12]
|
||||
type My x y
|
||||
|
||||
c_1 = Json.from_pairs [["name", "a"], ["data", c_1_data]]
|
||||
c_2 = Json.from_pairs [["name", "b"], ["data", c_2_data]]
|
||||
c_3 = Json.from_pairs [["name", "c"], ["data", c_3_data]]
|
||||
My.== that = case that of
|
||||
My x1 y1 -> (this.x + this.y) == (x1 + y1)
|
||||
_ -> False
|
||||
|
||||
expected = Json.from_pairs [["columns", [c_1, c_2, c_3]]]
|
||||
My.frobnicate = case this of
|
||||
My x1 y1 -> My y1 x1
|
||||
|
||||
simple_empty.to_json.should equal expected
|
||||
it "should correctly infer types of varied-type columns" <|
|
||||
varied_column = (Enso_Project.data / "varied_column.csv") . read_csv has_header=False
|
||||
c_1_data = ["2005-02-25", "2005-02-28", "4", "2005-03-02", Nothing, "2005-03-04", "2005-03-07", "2005-03-08"]
|
||||
c_2_data = ["2005-02-25", "2005-02-28", "2005-03-01", Nothing, "2005-03-03", "2005-03-04", "2005-03-07", "2005-03-08"]
|
||||
c_3_data = [1, 2, 3, 4, 5, Nothing, 7, 8]
|
||||
c_4_data = [1, 2, 3, 4, 5, 6, 7, 8]
|
||||
c_5_data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.25, 7.0, 8.0]
|
||||
c_6_data = ['1', '2', '3', '4', '5', '6.25', '7', 'osiem']
|
||||
spec =
|
||||
describe "Parsing" <|
|
||||
it "should parse a simple numeric table" <|
|
||||
simple_empty = (Enso_Project.data / "simple_empty.csv") . read_csv
|
||||
c_1_data = [1, 4, 7, 10]
|
||||
c_2_data = [2, Nothing, 8, 11]
|
||||
c_3_data = [Nothing, 6, 9, 12]
|
||||
|
||||
c_1 = Json.from_pairs [["name", "C0"], ["data", c_1_data]]
|
||||
c_2 = Json.from_pairs [["name", "C1"], ["data", c_2_data]]
|
||||
c_3 = Json.from_pairs [["name", "C2"], ["data", c_3_data]]
|
||||
c_4 = Json.from_pairs [["name", "C3"], ["data", c_4_data]]
|
||||
c_5 = Json.from_pairs [["name", "C4"], ["data", c_5_data]]
|
||||
c_6 = Json.from_pairs [["name", "C5"], ["data", c_6_data]]
|
||||
c_1 = Json.from_pairs [["name", "a"], ["data", c_1_data]]
|
||||
c_2 = Json.from_pairs [["name", "b"], ["data", c_2_data]]
|
||||
c_3 = Json.from_pairs [["name", "c"], ["data", c_3_data]]
|
||||
|
||||
expected = Json.from_pairs [["columns", [c_1, c_2, c_3, c_4, c_5, c_6]]]
|
||||
varied_column.to_json.should equal expected
|
||||
expected = Json.from_pairs [["columns", [c_1, c_2, c_3]]]
|
||||
|
||||
simple_empty.to_json.should equal expected
|
||||
it "should correctly infer types of varied-type columns" <|
|
||||
varied_column = (Enso_Project.data / "varied_column.csv") . read_csv has_header=False
|
||||
c_1_data = ["2005-02-25", "2005-02-28", "4", "2005-03-02", Nothing, "2005-03-04", "2005-03-07", "2005-03-08"]
|
||||
c_2_data = ["2005-02-25", "2005-02-28", "2005-03-01", Nothing, "2005-03-03", "2005-03-04", "2005-03-07", "2005-03-08"]
|
||||
c_3_data = [1, 2, 3, 4, 5, Nothing, 7, 8]
|
||||
c_4_data = [1, 2, 3, 4, 5, 6, 7, 8]
|
||||
c_5_data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.25, 7.0, 8.0]
|
||||
c_6_data = ['1', '2', '3', '4', '5', '6.25', '7', 'osiem']
|
||||
|
||||
c_1 = Json.from_pairs [["name", "C0"], ["data", c_1_data]]
|
||||
c_2 = Json.from_pairs [["name", "C1"], ["data", c_2_data]]
|
||||
c_3 = Json.from_pairs [["name", "C2"], ["data", c_3_data]]
|
||||
c_4 = Json.from_pairs [["name", "C3"], ["data", c_4_data]]
|
||||
c_5 = Json.from_pairs [["name", "C4"], ["data", c_5_data]]
|
||||
c_6 = Json.from_pairs [["name", "C5"], ["data", c_6_data]]
|
||||
|
||||
expected = Json.from_pairs [["columns", [c_1, c_2, c_3, c_4, c_5, c_6]]]
|
||||
varied_column.to_json.should equal expected
|
||||
|
||||
describe "JSON serialization" <|
|
||||
it "should serialize all column types to correct JSON" <|
|
||||
c_1 = [1, 2, 3, Nothing]
|
||||
c_2 = [1.2, 3.4, 5.6, 7.8]
|
||||
c_3 = [Nothing, 'foo', 'bar', 'baz']
|
||||
c_4 = [True, False, True, True]
|
||||
c_5 = [My 1 2, My True False, My 6.3 6.4, [1, 2, 3]]
|
||||
|
||||
t = Table.new [['a', c_1], ['b', c_2], ['c', c_3], ['d', c_4], ['e', c_5]]
|
||||
|
||||
j_c_1 = Json.from_pairs [["name", "a"], ["data", c_1]]
|
||||
j_c_2 = Json.from_pairs [["name", "b"], ["data", c_2]]
|
||||
j_c_3 = Json.from_pairs [["name", "c"], ["data", c_3]]
|
||||
j_c_4 = Json.from_pairs [["name", "d"], ["data", c_4]]
|
||||
j_c_5 = Json.from_pairs [["name", "e"], ["data", c_5]]
|
||||
|
||||
expected = Json.from_pairs [["columns", [j_c_1, j_c_2, j_c_3, j_c_4, j_c_5]]]
|
||||
|
||||
t.to_json.should_equal expected
|
||||
|
||||
describe "Mapping operations" <|
|
||||
it "should allow mapping a function over a column" <|
|
||||
c_str = Column.from_vector 'x' ['a', 'b', Nothing, 'b']
|
||||
c_str.map (+ "x") . to_vector . should_equal ['ax', 'bx', Nothing, 'bx']
|
||||
c_int = Column.from_vector 'x' [1, 2, 1, 5, 1]
|
||||
c_int.map (+ 1) . to_vector . should_equal [2, 3, 2, 6, 2]
|
||||
c_dec = Column.from_vector 'x' [1.9, 2.0, 1.2, 5.6, 1.9]
|
||||
c_dec.map (+ 1.5) . to_vector . should_equal [3.4, 3.5, 2.7, 7.1, 3.4]
|
||||
c_bool = Column.from_vector 'x' [True, False, Nothing, True, False]
|
||||
c_bool.map (_.to_text) . to_vector . should_equal ["True", "False", Nothing, "True", "False"]
|
||||
c_any = Column.from_vector 'x' [My 1 6, My 6 3, My 2 5, My 3 4, My 200 300]
|
||||
c_any.map (_.frobnicate) . to_vector . should_equal [My 6 1, My 3 6, My 5 2, My 4 3, My 300 200]
|
||||
|
||||
it "should handle vectorized equality and fall back on non-vectorized if needed" <|
|
||||
c_str = Column.from_vector 'x' ['a', 'b', Nothing, 'b']
|
||||
(c_str == 'b').to_vector.should_equal [False, True, False, True]
|
||||
c_int = Column.from_vector 'x' [1, 2, 1, 5, 1]
|
||||
(c_int == 1).to_vector.should_equal [True, False, True, False, True]
|
||||
c_dec = Column.from_vector 'x' [1.9, 2.0, 1.2, 5.6, 1.9]
|
||||
(c_dec == 1.9).to_vector.should_equal [True, False, False, False, True]
|
||||
c_bool = Column.from_vector 'x' [True, False, Nothing, True, False]
|
||||
(c_bool == False).to_vector.should_equal [False, True, False, False, True]
|
||||
c_any = Column.from_vector 'x' [My 1 6, My 6 3, My 2 5, My 3 4, My 200 300]
|
||||
(c_any == My 7 0).to_vector.should_equal [True, False, True, True, False]
|
||||
|
||||
describe "Masking tables" <|
|
||||
it "should allow selecting table rows based on a boolean column" <|
|
||||
df = (Enso_Project.data / "simple_empty.csv").read_csv
|
||||
r = df.where (Column.from_vector 'x' [True, False, False, True])
|
||||
r.at "a" . to_vector . should_equal [1, 10]
|
||||
r.at "b" . to_vector . should_equal [2, 11]
|
||||
r.at "c" . to_vector . should_equal [Nothing, 12]
|
||||
it "should treat NA values in the mask as false and extend the mask with NAs" <|
|
||||
df = (Enso_Project.data / "simple_empty.csv").read_csv
|
||||
r = df.where (Column.from_vector 'x' [Nothing, True, False])
|
||||
r.at "a" . to_vector . should_equal [4]
|
||||
r.at "b" . to_vector . should_equal [Nothing]
|
||||
r.at "c" . to_vector . should_equal [6]
|
||||
|
@ -43,3 +43,9 @@ spec = describe "Text" <|
|
||||
text_1.to_text.should_equal "'foo\\nbar\\r\\tbaz'"
|
||||
text_2 = '\n\t\a\b\f\r\v\e\''
|
||||
text_2.to_text.should_equal "'\\n\\t\\a\\b\\f\\r\\v\\e\\''"
|
||||
it "should allow selecting substrings by characters" <|
|
||||
txt = kshi + facepalm + accent_1 + accent_2
|
||||
txt.take_first 2 . should_equal (kshi + facepalm)
|
||||
txt.drop_first 2 . should_equal (accent_1 + accent_2)
|
||||
txt.take_last 2 . should_equal (accent_1 + accent_2)
|
||||
txt.drop_last 2 . should_equal (kshi + facepalm)
|
||||
|
Loading…
Reference in New Issue
Block a user