mirror of
https://github.com/enso-org/enso.git
synced 2024-11-23 08:08:34 +03:00
Remove countMissing
from Storage and replace with a new CountNothing
operation. (#9137)
Removing another small piece of logic from the storages to it's own operation.
This commit is contained in:
parent
5e4a7cf01c
commit
0e2a91cfe1
@ -33,6 +33,7 @@ from project.Internal.Column_Format import all
|
||||
from project.Internal.Java_Exports import make_date_builder_adapter, make_string_builder
|
||||
|
||||
polyglot java import org.enso.base.Time_Utils
|
||||
polyglot java import org.enso.table.data.column.operation.CountNothing
|
||||
polyglot java import org.enso.table.data.column.operation.UnaryOperation
|
||||
polyglot java import org.enso.table.data.column.operation.cast.CastProblemAggregator
|
||||
polyglot java import org.enso.table.data.column.operation.unary.DatePartOperation
|
||||
@ -2085,7 +2086,7 @@ type Column
|
||||
|
||||
example_count_nothing = Examples.text_column_2.count_nothing
|
||||
count_nothing : Integer
|
||||
count_nothing self = self.java_column.getStorage.countMissing
|
||||
count_nothing self = CountNothing.apply self.java_column
|
||||
|
||||
## GROUP Standard.Base.Metadata
|
||||
Returns the number of non-null items in this column.
|
||||
|
@ -0,0 +1,64 @@
|
||||
package org.enso.table.data.column.operation;
|
||||
|
||||
import org.enso.table.data.column.storage.ColumnStorage;
|
||||
import org.enso.table.data.column.storage.ColumnStorageWithNothingMap;
|
||||
import org.enso.table.data.table.Column;
|
||||
import org.graalvm.polyglot.Context;
|
||||
|
||||
/** An operation for counting the number of Nothing values in a Column. */
|
||||
public class CountNothing {
|
||||
/** Counts the number of Nothing values in the given column. */
|
||||
public static long apply(Column column) {
|
||||
ColumnStorage storage = column.getStorage();
|
||||
return applyToStorage(storage);
|
||||
}
|
||||
|
||||
/** Counts the number of Nothing values in the given storage. */
|
||||
public static long applyToStorage(ColumnStorage storage) {
|
||||
if (storage instanceof ColumnStorageWithNothingMap withNothingMap) {
|
||||
return withNothingMap.getIsNothingMap().cardinality();
|
||||
}
|
||||
|
||||
Context context = Context.getCurrent();
|
||||
long count = 0;
|
||||
for (long i = 0; i < storage.getSize(); i++) {
|
||||
if (storage.isNothing(i)) {
|
||||
count += 1;
|
||||
}
|
||||
context.safepoint();
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/** Returns true if any value in the storage is Nothing. */
|
||||
public static boolean anyNothing(ColumnStorage storage) {
|
||||
if (storage instanceof ColumnStorageWithNothingMap withNothingMap) {
|
||||
return !withNothingMap.getIsNothingMap().isEmpty();
|
||||
}
|
||||
|
||||
Context context = Context.getCurrent();
|
||||
for (long i = 0; i < storage.getSize(); i++) {
|
||||
if (storage.isNothing(i)) {
|
||||
return true;
|
||||
}
|
||||
context.safepoint();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Returns true if all values in the storage are Nothing. */
|
||||
public static boolean allNothing(ColumnStorage storage) {
|
||||
if (storage instanceof ColumnStorageWithNothingMap withNothingMap) {
|
||||
return withNothingMap.getIsNothingMap().nextClearBit(0) >= storage.getSize();
|
||||
}
|
||||
|
||||
Context context = Context.getCurrent();
|
||||
for (long i = 0; i < storage.getSize(); i++) {
|
||||
if (!storage.isNothing(i)) {
|
||||
return false;
|
||||
}
|
||||
context.safepoint();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
@ -51,14 +51,6 @@ public final class BoolStorage extends Storage<Boolean>
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
@Override
|
||||
public int countMissing() {
|
||||
return isMissing.cardinality();
|
||||
}
|
||||
|
||||
@Override
|
||||
public StorageType getType() {
|
||||
return BooleanType.INSTANCE;
|
||||
|
@ -26,11 +26,6 @@ public class MixedStorageFacade extends Storage<Object> {
|
||||
return underlyingStorage.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int countMissing() {
|
||||
return underlyingStorage.countMissing();
|
||||
}
|
||||
|
||||
@Override
|
||||
public StorageType getType() {
|
||||
return AnyObjectType.INSTANCE;
|
||||
|
@ -3,6 +3,7 @@ package org.enso.table.data.column.storage;
|
||||
import java.util.AbstractList;
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
import org.enso.table.data.column.operation.CountNothing;
|
||||
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
|
||||
import org.enso.table.data.column.operation.map.MapOperationStorage;
|
||||
import org.enso.table.data.column.storage.type.StorageType;
|
||||
@ -42,23 +43,6 @@ public abstract class SpecializedStorage<T> extends Storage<T> {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
@Override
|
||||
public int countMissing() {
|
||||
Context context = Context.getCurrent();
|
||||
int count = 0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (data[i] == null) {
|
||||
count += 1;
|
||||
}
|
||||
|
||||
context.safepoint();
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param idx an index
|
||||
* @return the data item contained at the given index.
|
||||
@ -161,7 +145,7 @@ public abstract class SpecializedStorage<T> extends Storage<T> {
|
||||
|
||||
@Override
|
||||
public Storage<T> fillMissingFromPrevious(BoolStorage missingIndicator) {
|
||||
if (missingIndicator != null && missingIndicator.countMissing() > 0) {
|
||||
if (missingIndicator != null && CountNothing.anyNothing(missingIndicator)) {
|
||||
throw new IllegalArgumentException(
|
||||
"Missing indicator must not contain missing values itself.");
|
||||
}
|
||||
|
@ -28,11 +28,6 @@ public abstract class Storage<T> implements ColumnStorage {
|
||||
*/
|
||||
public abstract int size();
|
||||
|
||||
/**
|
||||
* @return the number of NA elements in this column
|
||||
*/
|
||||
public abstract int countMissing();
|
||||
|
||||
/**
|
||||
* @return the type tag of this column's storage.
|
||||
*/
|
||||
|
@ -16,16 +16,13 @@ import org.enso.table.data.column.operation.map.numeric.comparisons.GreaterOrEqu
|
||||
import org.enso.table.data.column.operation.map.numeric.comparisons.LessComparison;
|
||||
import org.enso.table.data.column.operation.map.numeric.comparisons.LessOrEqualComparison;
|
||||
import org.enso.table.data.column.operation.map.numeric.isin.LongIsInOp;
|
||||
import org.enso.table.data.column.storage.BoolStorage;
|
||||
import org.enso.table.data.column.storage.ColumnLongStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.column.storage.ValueIsNothingException;
|
||||
import org.enso.table.data.column.storage.*;
|
||||
import org.enso.table.data.column.storage.type.IntegerType;
|
||||
import org.enso.table.data.column.storage.type.StorageType;
|
||||
import org.graalvm.polyglot.Context;
|
||||
|
||||
public abstract class AbstractLongStorage extends NumericStorage<Long>
|
||||
implements ColumnLongStorage {
|
||||
implements ColumnLongStorage, ColumnStorageWithNothingMap {
|
||||
public abstract long getItem(int idx);
|
||||
|
||||
public abstract BitSet getIsMissing();
|
||||
@ -173,4 +170,9 @@ public abstract class AbstractLongStorage extends NumericStorage<Long>
|
||||
}
|
||||
return getItem((int) index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BitSet getIsNothingMap() {
|
||||
return getIsMissing();
|
||||
}
|
||||
}
|
||||
|
@ -27,11 +27,6 @@ public abstract class ComputedLongStorage extends AbstractLongStorage {
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int countMissing() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntegerType getType() {
|
||||
return IntegerType.INT_64;
|
||||
|
@ -17,6 +17,8 @@ import org.graalvm.polyglot.Context;
|
||||
public abstract class ComputedNullableLongStorage extends AbstractLongStorage {
|
||||
protected final int size;
|
||||
|
||||
protected BitSet isMissing = null;
|
||||
|
||||
protected abstract Long computeItem(int idx);
|
||||
|
||||
protected ComputedNullableLongStorage(int size) {
|
||||
@ -28,11 +30,6 @@ public abstract class ComputedNullableLongStorage extends AbstractLongStorage {
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int countMissing() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntegerType getType() {
|
||||
return IntegerType.INT_64;
|
||||
@ -64,16 +61,20 @@ public abstract class ComputedNullableLongStorage extends AbstractLongStorage {
|
||||
|
||||
@Override
|
||||
public BitSet getIsMissing() {
|
||||
BitSet missing = new BitSet();
|
||||
Context context = Context.getCurrent();
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (computeItem(i) == null) {
|
||||
missing.set(i);
|
||||
}
|
||||
if (isMissing == null) {
|
||||
// Only compute once as needed.
|
||||
BitSet missing = new BitSet();
|
||||
Context context = Context.getCurrent();
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (computeItem(i) == null) {
|
||||
missing.set(i);
|
||||
}
|
||||
|
||||
context.safepoint();
|
||||
context.safepoint();
|
||||
}
|
||||
isMissing = missing;
|
||||
}
|
||||
return missing;
|
||||
return isMissing;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -67,14 +67,6 @@ public final class DoubleStorage extends NumericStorage<Double>
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
@Override
|
||||
public int countMissing() {
|
||||
return isMissing.cardinality();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param idx an index
|
||||
* @return the data item contained at the given index.
|
||||
|
@ -6,7 +6,6 @@ import java.util.List;
|
||||
import org.enso.base.polyglot.NumericConverter;
|
||||
import org.enso.table.data.column.builder.BigIntegerBuilder;
|
||||
import org.enso.table.data.column.builder.NumericBuilder;
|
||||
import org.enso.table.data.column.storage.ColumnStorageWithNothingMap;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.column.storage.type.IntegerType;
|
||||
import org.enso.table.data.column.storage.type.StorageType;
|
||||
@ -18,7 +17,7 @@ import org.graalvm.polyglot.Context;
|
||||
import org.graalvm.polyglot.Value;
|
||||
|
||||
/** A column storing 64-bit integers. */
|
||||
public final class LongStorage extends AbstractLongStorage implements ColumnStorageWithNothingMap {
|
||||
public final class LongStorage extends AbstractLongStorage {
|
||||
// TODO [RW] at some point we will want to add separate storage classes for byte, short and int,
|
||||
// for more compact storage and more efficient handling of smaller integers; for now we will be
|
||||
// handling this just by checking the bounds
|
||||
@ -64,14 +63,6 @@ public final class LongStorage extends AbstractLongStorage implements ColumnStor
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
@Override
|
||||
public int countMissing() {
|
||||
return isMissing.cardinality();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param idx an index
|
||||
* @return the data item contained at the given index.
|
||||
@ -260,9 +251,4 @@ public final class LongStorage extends AbstractLongStorage implements ColumnStor
|
||||
assert widerType.fits(type);
|
||||
return new LongStorage(data, size, isMissing, widerType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BitSet getIsNothingMap() {
|
||||
return isMissing;
|
||||
}
|
||||
}
|
||||
|
@ -14,6 +14,7 @@ import java.util.stream.IntStream;
|
||||
import org.enso.base.text.TextFoldingStrategy;
|
||||
import org.enso.table.aggregations.Aggregator;
|
||||
import org.enso.table.data.column.builder.Builder;
|
||||
import org.enso.table.data.column.operation.CountNothing;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.table.Column;
|
||||
import org.enso.table.data.table.Table;
|
||||
@ -166,7 +167,7 @@ public class MultiValueIndex<KeyType extends MultiValueKeyBase> {
|
||||
*/
|
||||
public KeyType findAnyNullKey() {
|
||||
for (Column c : keyColumns) {
|
||||
boolean containsNulls = c.getStorage().countMissing() > 0;
|
||||
boolean containsNulls = CountNothing.anyNothing(c.getStorage());
|
||||
if (containsNulls) {
|
||||
for (KeyType key : locs.keySet()) {
|
||||
if (key.hasAnyNulls()) {
|
||||
|
@ -1,6 +1,7 @@
|
||||
package org.enso.table.parsing;
|
||||
|
||||
import org.enso.table.data.column.builder.Builder;
|
||||
import org.enso.table.data.column.operation.CountNothing;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.parsing.problems.CommonParseProblemAggregator;
|
||||
import org.enso.table.parsing.problems.ParseProblemAggregator;
|
||||
@ -43,8 +44,7 @@ public class TypeInferringParser extends DatatypeParser {
|
||||
Storage<String> sourceStorage, CommonParseProblemAggregator problemAggregator) {
|
||||
// If there are no values, the Auto parser would guess some random type (the first one that is
|
||||
// checked). Instead, we just return the empty column unchanged.
|
||||
boolean hasNoValues =
|
||||
(sourceStorage.size() == 0) || (sourceStorage.countMissing() == sourceStorage.size());
|
||||
boolean hasNoValues = (sourceStorage.size() == 0) || CountNothing.allNothing(sourceStorage);
|
||||
if (hasNoValues) {
|
||||
return fallbackParser.parseColumn(sourceStorage, problemAggregator);
|
||||
}
|
||||
|
@ -34,11 +34,6 @@ public class ExplodingStorage extends Storage<Long> {
|
||||
return array.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int countMissing() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StorageType getType() {
|
||||
return IntegerType.INT_64;
|
||||
|
@ -630,6 +630,13 @@ add_specs suite_builder =
|
||||
c1 = Column.from_vector "A" ["1", "2", "3"]
|
||||
c1.parse type=Nothing . should_fail_with Illegal_Argument
|
||||
|
||||
group_builder.specify "should return unchanged if all are Nothing or no rows" <|
|
||||
c1 = Column.from_vector "A" [Nothing, Nothing, Nothing] Value_Type.Char
|
||||
c1.parse.value_type . should_equal Value_Type.Char
|
||||
|
||||
c2 = Column.from_vector "A" [] Value_Type.Char
|
||||
c2.parse.value_type . should_equal Value_Type.Char
|
||||
|
||||
group_builder.specify "should error if the input column is not text" <|
|
||||
c1 = Column.from_vector "A" [1, 2, 3]
|
||||
r1 = c1.parse
|
||||
|
Loading…
Reference in New Issue
Block a user