Create a Table Row Type and expose as a Vector on In-Memory Table with .rows property (#3827)

Implements https://www.pivotaltracker.com/story/show/182307026
This commit is contained in:
Radosław Waśko 2022-10-26 13:21:33 +02:00 committed by GitHub
parent 46441ca7a8
commit bb29833da5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 317 additions and 32 deletions

View File

@ -219,6 +219,7 @@
- [Replaced `Table.drop_missing_columns` with
`Table.remove_columns Column_Selector.Blank_Columns` by adding the new column
selector variant.][3812]
- [Implemented `Table.rows` giving access to a vector of rows.][3827]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -351,6 +352,7 @@
[3805]: https://github.com/enso-org/enso/pull/3805
[3812]: https://github.com/enso-org/enso/pull/3812
[3823]: https://github.com/enso-org/enso/pull/3823
[3827]: https://github.com/enso-org/enso/pull/3827
#### Enso Compiler

View File

@ -0,0 +1,34 @@
from Standard.Base import Any, Array, Integer
## A helper type used for creating an array from a length and a callback
providing its elements.
It can be used to create an array from some non-standard underlying storage
without copying. The created proxy is read only and behaves like any `Array`
type returned from polyglot calls to other languages. The most common
use-case for it is to pass it to `Vector.from_polyglot_array` to create a
vector backed by such custom storage.
@Builtin_Type
type Array_Proxy
## ADVANCED
UNSTABLE
Creates a new `Array_Proxy` from a length and a callback.
Arguments:
- length: The length of the array to create.
- at: A function which returns each element of the array.
> Example
Create a Vector of 10 million elements without allocating any storage.
Vector.from_polyglot_array (Array_Proxy.new 10000000 (i -> i))
new : Integer -> (Integer -> Any) -> Array
new length at = @Builtin_Method "Array_Proxy.new"
## ADVANCED
UNSTABLE
Creates a new `Array_Proxy` from an object providing `length` and `at`
methods.
from_proxy_object : Any -> Array
from_proxy_object proxy =
Array_Proxy.new proxy.length proxy.at

View File

@ -499,6 +499,18 @@ type Table
columns : Vector Column
columns self = self.internal_columns . map self.make_column
## Returns a vector of rows contained in this table.
In the database backend, it first materializes the table to in-memory.
Arguments:
- max_rows: The maximum amount of rows to return. It is mainly meant for
the Database backend, to limit how many rows are downloaded. In the
in-memory backend it is only kept for API compatibility.
rows : Integer -> Vector Row
rows self max_rows=1000 =
self.read max_rows=max_rows . rows
## UNSTABLE
Sets the index of this table, using the column with the provided name.

View File

@ -0,0 +1,21 @@
from Standard.Base import all
from Standard.Base.Data.Array_Proxy import Array_Proxy
from project.Data.Table import Table
## Represents a single row of some in-memory Table.
type Row
## PRIVATE
Value (table:Table) (index:Integer)
## Gets the number of columns in the table.
length : Integer
length self = self.table.columns.length
## Gets the value of the specified column.
at : (Integer | Text) -> Any
at self column = self.table.at column . at self.index
## Gets the row as a Vector.
to_vector : Vector
to_vector self = Vector.from_polyglot_array (Array_Proxy.from_proxy_object self)

View File

@ -1,6 +1,7 @@
from Standard.Base import all
import Standard.Base.Error.Common as Errors
from Standard.Base.Error.Problem_Behavior import Report_Warning
from Standard.Base.Data.Array_Proxy import Array_Proxy
import Standard.Base.Data.Index_Sub_Range
import Standard.Base.Data.Ordering.Comparator
import Standard.Base.Data.Text.Case
@ -28,6 +29,7 @@ from project.Data.Column import get_item_string
from project.Data.Column_Type_Selection import Column_Type_Selection, Auto
from project.Delimited.Delimited_Format import Delimited
from project.Internal.Filter_Condition_Helpers import make_filter_column
from project.Internal.Rows_View import Rows_View
from project.Errors import Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector, No_Index_Set_Error, No_Such_Column_Error, No_Such_Column_Error_Data, No_Input_Columns_Selected, No_Output_Columns, Invalid_Value_Type
import Standard.Visualization
@ -929,6 +931,22 @@ type Table
columns : Vector
columns self = Vector.from_polyglot_array self.java_table.getColumns . map Column.Column_Data
## Returns a vector of rows contained in this table.
In the database backend, it first materializes the table to in-memory.
Arguments:
- max_rows: The maximum amount of rows to return. It is mainly meant for
the Database backend, to limit how many rows are downloaded. In the
in-memory backend it is only kept for API compatibility.
rows : Integer -> Vector Row
rows self max_rows=Nothing =
table = case max_rows of
Nothing -> self
_ : Integer -> self.slice 0 max_rows
proxy = Rows_View.Value table
Vector.from_polyglot_array (Array_Proxy.from_proxy_object proxy)
## Sets the index of this table, using the column with the provided name.
Arguments:

View File

@ -0,0 +1,16 @@
from Standard.Base import all
from project.Data.Table import Table
from project.Data.Row import Row
type Rows_View
## PRIVATE
Value (table:Table)
## Gets the number of rows in the table.
length : Integer
length self = self.table.row_count
## Gets the specified row.
at : Integer -> Any
at self index = Row.Value self.table index

View File

@ -2,8 +2,10 @@ package org.enso.interpreter.bench.benchmarks.semantic;
import java.io.ByteArrayOutputStream;
import java.nio.file.Paths;
import java.util.AbstractList;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.function.Supplier;
import org.graalvm.polyglot.Context;
import org.graalvm.polyglot.Engine;
import org.graalvm.polyglot.Value;
@ -48,6 +50,7 @@ public class VectorBenchmarks {
.build();
var module = ctx.eval("enso", "\n" +
"import Standard.Base.Data.Vector\n" +
"from Standard.Base.Data.Array_Proxy import Array_Proxy\n" +
"\n" +
"avg arr =\n" +
" sum acc i = if i == arr.length then acc else\n" +
@ -68,37 +71,55 @@ public class VectorBenchmarks {
"to_vector arr = Vector.from_polyglot_array arr\n" +
"to_array vec = vec.to_array\n" +
"slice vec = vec.slice\n" +
"fill_proxy proxy vec = \n" +
" size v = vec.length\n" +
" at i = vec.at i\n" +
" proxy.init size at\n" +
"create_array_proxy vec =\n" +
" Array_Proxy.from_proxy_object vec\n" +
"\n");
this.self = module.invokeMember("get_associated_type");
Function<String,Value> getMethod = (name) -> module.invokeMember("get_method", self, name);
var length = 1000;
Value arr = getMethod.apply("fibarr").execute(self, length, Integer.MAX_VALUE);
Value vec = getMethod.apply("fibarr").execute(self, length, Integer.MAX_VALUE);
switch (params.getBenchmark().replaceFirst(".*\\.", "")) {
case "averageOverVector": {
this.arrayOfFibNumbers = arr;
this.arrayOfFibNumbers = vec;
break;
}
case "averageOverSlice": {
this.arrayOfFibNumbers = getMethod.apply("slice").execute(self, arr, 1, length);
this.arrayOfFibNumbers = getMethod.apply("slice").execute(self, vec, 1, length);
break;
}
case "averageOverArray": {
this.arrayOfFibNumbers = getMethod.apply("to_array").execute(self, arr);
this.arrayOfFibNumbers = getMethod.apply("to_array").execute(self, vec);
break;
}
case "averageOverPolyglotVector": {
long[] copy = copyToPolyglotArray(arr);
long[] copy = copyToPolyglotArray(vec);
this.arrayOfFibNumbers = getMethod.apply("to_vector").execute(self, copy);
break;
}
case "averageOverPolyglotArray": {
long[] copy = copyToPolyglotArray(arr);
long[] copy = copyToPolyglotArray(vec);
this.arrayOfFibNumbers = Value.asValue(copy);
break;
}
case "averageOverArrayProxy": {
this.arrayOfFibNumbers = getMethod.apply("create_array_proxy").execute(self, vec);
break;
}
case "averageAbstractList": {
long[] copy = copyToPolyglotArray(vec);
final ProxyList<Long> proxyList = new ProxyList<Long>();
getMethod.apply("fill_proxy").execute(self, proxyList, copy);
this.arrayOfFibNumbers = Value.asValue(proxyList);
break;
}
default:
throw new IllegalStateException("Unexpected benchmark: " + params.getBenchmark());
}
@ -138,6 +159,16 @@ public class VectorBenchmarks {
performBenchmark(matter);
}
@Benchmark
public void averageOverArrayProxy(Blackhole matter) {
performBenchmark(matter);
}
@Benchmark
public void averageAbstractList(Blackhole matter) {
performBenchmark(matter);
}
private void performBenchmark(Blackhole matter) throws AssertionError {
var average = avg.execute(self, arrayOfFibNumbers);
if (!average.fitsInDouble()) {
@ -150,5 +181,25 @@ public class VectorBenchmarks {
}
matter.consume(result);
}
public static final class ProxyList<T> extends AbstractList<T> {
private Function<Object, Integer> size;
private Function<Integer, T> get;
public void init(Function<Object, Integer> size, Function<Integer, T> get) {
this.size = size;
this.get = get;
}
@Override
public T get(int i) {
return get.apply(i);
}
@Override
public int size() {
return size.apply(0);
}
}
}

View File

@ -14,8 +14,4 @@ public class InvalidArrayIndexError extends UniquelyConstructibleBuiltin {
protected List<String> getConstructorParamNames() {
return List.of("array", "index");
}
public Atom wrap(Context c, Array.InvalidIndexException e) {
return newInstance(e.getArray(), e.getIndex());
}
}

View File

@ -8,7 +8,6 @@ import com.oracle.truffle.api.library.CachedLibrary;
import com.oracle.truffle.api.library.ExportLibrary;
import com.oracle.truffle.api.library.ExportMessage;
import org.enso.interpreter.dsl.Builtin;
import org.enso.interpreter.node.expression.builtin.error.InvalidArrayIndexError;
import org.enso.interpreter.runtime.Context;
import org.enso.interpreter.runtime.library.dispatch.TypesLibrary;
@ -20,24 +19,6 @@ import org.enso.interpreter.runtime.error.WithWarnings;
@ExportLibrary(TypesLibrary.class)
@Builtin(pkg = "mutable", stdlibName = "Standard.Base.Data.Array.Array")
public final class Array implements TruffleObject {
public static class InvalidIndexException extends RuntimeException {
private final long index;
private final Array array;
public InvalidIndexException(long index, Array array) {
this.index = index;
this.array = array;
}
public long getIndex() {
return index;
}
public Array getArray() {
return array;
}
}
private final Object[] items;
/**

View File

@ -0,0 +1,74 @@
package org.enso.interpreter.runtime.data;
import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.api.interop.ArityException;
import com.oracle.truffle.api.interop.InteropLibrary;
import com.oracle.truffle.api.interop.InvalidArrayIndexException;
import com.oracle.truffle.api.interop.TruffleObject;
import com.oracle.truffle.api.interop.UnsupportedMessageException;
import com.oracle.truffle.api.interop.UnsupportedTypeException;
import com.oracle.truffle.api.library.CachedLibrary;
import com.oracle.truffle.api.library.ExportLibrary;
import com.oracle.truffle.api.library.ExportMessage;
import org.enso.interpreter.dsl.Builtin;
import org.enso.interpreter.runtime.Context;
import org.enso.interpreter.runtime.error.PanicException;
import org.enso.interpreter.runtime.library.dispatch.TypesLibrary;
/**
* A wrapper that allows to turn an Enso callback providing elements into a polyglot Array.
*
* <p>This allows creation of arrays (and with them, vectors) using non-standard storage - for
* example exposing rows of a Table without copying any data.
*/
@ExportLibrary(InteropLibrary.class)
@ExportLibrary(TypesLibrary.class)
@Builtin(pkg = "immutable", stdlibName = "Standard.Base.Data.Array_Proxy.Array_Proxy")
public final class ArrayProxy implements TruffleObject {
private final long length;
private final Object at;
@Builtin.Method(description = "Creates an array backed by a proxy object.")
public ArrayProxy(long length, Object at) {
if (CompilerDirectives.inInterpreter()) {
InteropLibrary interop = InteropLibrary.getUncached();
if (!interop.isExecutable(at)) {
throw new PanicException(
Context.get(interop).getBuiltins().error().makeTypeError("Function", at, "at"),
interop);
}
}
this.length = length;
this.at = at;
}
@ExportMessage
public boolean hasArrayElements() {
return true;
}
@ExportMessage
public long getArraySize() {
return length;
}
@ExportMessage
boolean isArrayElementReadable(long index) {
return index < length && index >= 0;
}
@ExportMessage
public Object readArrayElement(long index, @CachedLibrary(limit = "3") InteropLibrary interop)
throws UnsupportedMessageException, InvalidArrayIndexException {
if (index >= length || index < 0) {
throw InvalidArrayIndexException.create(index);
}
try {
return interop.execute(at, index);
} catch (UnsupportedTypeException | ArityException | UnsupportedMessageException e) {
throw UnsupportedMessageException.create(e);
}
}
}

View File

@ -38,6 +38,7 @@ import org.enso.polyglot.data.TypeGraph;
UnresolvedConversion.class,
UnresolvedSymbol.class,
Array.class,
ArrayProxy.class,
ArrayOverBuffer.class,
EnsoBigInteger.class,
ManagedResource.class,
@ -133,7 +134,7 @@ public class Types {
return Constants.UNRESOLVED_SYMBOL;
} else if (TypesGen.isManagedResource(value)) {
return ConstantsGen.MANAGED_RESOURCE;
} else if (TypesGen.isArray(value) || TypesGen.isArrayOverBuffer(value)) {
} else if (TypesGen.isArray(value) || TypesGen.isArrayOverBuffer(value) || TypesGen.isArrayProxy(value)) {
return ConstantsGen.ARRAY;
} else if (TypesGen.isVector(value)) {
return ConstantsGen.VECTOR;

View File

@ -1442,3 +1442,37 @@ spec prefix table_builder test_selection pending=Nothing =
no_cols.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal []
no_rows.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["X"]
no_rows.remove_columns Column_Selector.Blank_Columns . columns . map .name . should_equal []
Test.group prefix+"Table.rows" pending=pending <|
table = table_builder [["X", [1, 2, 3, 4]], ["Y", [5, 6, 7, 8]], ["Z", ["A", "B", "C", "D"]]]
Test.specify "should allow to get a Vector of Table rows" <|
rows = table.rows
rows.length . should_equal 4
first_row = rows.first
first_row . length . should_equal 3
first_row.at "X" . should_equal 1
first_row.at "Y" . should_equal 5
first_row.at "Z" . should_equal "A"
last_row = rows.at -1
last_row . length . should_equal 3
last_row.at 0 . should_equal 4
last_row.at 1 . should_equal 8
last_row.at 2 . should_equal "D"
last_row.at -1 . should_equal "D"
rows.map .to_vector . should_equal [[1, 5, "A"], [2, 6, "B"], [3, 7, "C"], [4, 8, "D"]]
Test.specify "should fetch rows up to the specified limit" <|
table.rows max_rows=2 . map .to_vector . should_equal [[1, 5, "A"], [2, 6, "B"]]
Test.specify "should correctly handle errors" <|
table.rows.at 5 . should_fail_with Index_Out_Of_Bounds_Error_Data
err = table.rows.at -6
err.should_fail_with Index_Out_Of_Bounds_Error_Data
err.catch . should_equal (Index_Out_Of_Bounds_Error_Data -6 4)
table.rows (max_rows=2) . at 2 . should_fail_with Index_Out_Of_Bounds_Error_Data
table.rows . at 0 . at -4 . should_fail_with Index_Out_Of_Bounds_Error_Data
table.rows . at 0 . at "unknown" . should_fail_with No_Such_Column_Error_Data

View File

@ -0,0 +1,43 @@
from Standard.Base import all
from Standard.Base.Data.Array_Proxy import Array_Proxy
from Standard.Test import Test, Test_Suite
type Proxy_Object
Value length
at : Integer -> Integer
at self ix = ix * 10
spec =
Test.group "Array_Proxy" <|
Test.specify "should correctly delegate to the callback" <|
arr = Array_Proxy.new 3 (ix -> ix + 10)
arr.length . should_equal 3
arr.at 0 . should_equal 10
arr.at 1 . should_equal 11
arr.at 2 . should_equal 12
arr.at 3 . should_fail_with Index_Out_Of_Bounds_Error_Data
Test.specify "should be able to be used to construct a Vector" <|
v1 = Vector.from_polyglot_array (Array_Proxy.new 3 (ix -> ix + 10))
v1.length . should_equal 3
v1 . should_equal [10, 11, 12]
v1.map (x -> x + 1) . should_equal [11, 12, 13]
v2 = Vector.from_polyglot_array (Array_Proxy.new 3 (ix -> v1.at 2 - ix))
v2.should_equal [12, 11, 10]
v2.sort . should_equal [10, 11, 12]
Test.specify "should be able to construct a Vector from a proxy object" <|
v1 = Vector.from_polyglot_array (Array_Proxy.from_proxy_object [4, 3, 2])
v1.should_equal [4, 3, 2]
v2 = Vector.from_polyglot_array (Array_Proxy.from_proxy_object (Proxy_Object.Value 5))
v2.should_equal [0, 10, 20, 30, 40]
Test.specify "should check the callback type validity at construction" <|
Test.expect_panic_with (Array_Proxy.new 0 0) Type_Error_Data
main = Test_Suite.run_main spec

View File

@ -19,8 +19,9 @@ import project.Semantic.Js_Interop_Spec
import project.Semantic.Python_Interop_Spec
import project.Semantic.R_Interop_Spec
import project.Data.Array_Polyglot_Spec
import project.Data.Array_Spec
import project.Data.Array_Polyglot_Spec
import project.Data.Array_Proxy_Spec
import project.Data.Bool_Spec
import project.Data.Function_Spec
import project.Data.Interval_Spec
@ -76,8 +77,9 @@ import project.Random_Spec
main = Test_Suite.run_main <|
Any_Spec.spec
Array_Polyglot_Spec.spec
Array_Spec.spec
Array_Proxy_Spec.spec
Array_Polyglot_Spec.spec
Bool_Spec.spec
Function_Spec.spec
Case_Spec.spec