Remove countMissing from Storage and replace with a new CountNothing operation. (#9137)

Removing another small piece of logic from the storages to it's own operation.
This commit is contained in:
James Dunkerley 2024-02-22 19:32:46 +00:00 committed by GitHub
parent 5e4a7cf01c
commit 0e2a91cfe1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 101 additions and 91 deletions

View File

@ -33,6 +33,7 @@ from project.Internal.Column_Format import all
from project.Internal.Java_Exports import make_date_builder_adapter, make_string_builder
polyglot java import org.enso.base.Time_Utils
polyglot java import org.enso.table.data.column.operation.CountNothing
polyglot java import org.enso.table.data.column.operation.UnaryOperation
polyglot java import org.enso.table.data.column.operation.cast.CastProblemAggregator
polyglot java import org.enso.table.data.column.operation.unary.DatePartOperation
@ -2085,7 +2086,7 @@ type Column
example_count_nothing = Examples.text_column_2.count_nothing
count_nothing : Integer
count_nothing self = self.java_column.getStorage.countMissing
count_nothing self = CountNothing.apply self.java_column
## GROUP Standard.Base.Metadata
Returns the number of non-null items in this column.

View File

@ -0,0 +1,64 @@
package org.enso.table.data.column.operation;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.ColumnStorageWithNothingMap;
import org.enso.table.data.table.Column;
import org.graalvm.polyglot.Context;
/** An operation for counting the number of Nothing values in a Column. */
public class CountNothing {
/** Counts the number of Nothing values in the given column. */
public static long apply(Column column) {
ColumnStorage storage = column.getStorage();
return applyToStorage(storage);
}
/** Counts the number of Nothing values in the given storage. */
public static long applyToStorage(ColumnStorage storage) {
if (storage instanceof ColumnStorageWithNothingMap withNothingMap) {
return withNothingMap.getIsNothingMap().cardinality();
}
Context context = Context.getCurrent();
long count = 0;
for (long i = 0; i < storage.getSize(); i++) {
if (storage.isNothing(i)) {
count += 1;
}
context.safepoint();
}
return count;
}
/** Returns true if any value in the storage is Nothing. */
public static boolean anyNothing(ColumnStorage storage) {
if (storage instanceof ColumnStorageWithNothingMap withNothingMap) {
return !withNothingMap.getIsNothingMap().isEmpty();
}
Context context = Context.getCurrent();
for (long i = 0; i < storage.getSize(); i++) {
if (storage.isNothing(i)) {
return true;
}
context.safepoint();
}
return false;
}
/** Returns true if all values in the storage are Nothing. */
public static boolean allNothing(ColumnStorage storage) {
if (storage instanceof ColumnStorageWithNothingMap withNothingMap) {
return withNothingMap.getIsNothingMap().nextClearBit(0) >= storage.getSize();
}
Context context = Context.getCurrent();
for (long i = 0; i < storage.getSize(); i++) {
if (!storage.isNothing(i)) {
return false;
}
context.safepoint();
}
return true;
}
}

View File

@ -51,14 +51,6 @@ public final class BoolStorage extends Storage<Boolean>
return size;
}
/**
* @inheritDoc
*/
@Override
public int countMissing() {
return isMissing.cardinality();
}
@Override
public StorageType getType() {
return BooleanType.INSTANCE;

View File

@ -26,11 +26,6 @@ public class MixedStorageFacade extends Storage<Object> {
return underlyingStorage.size();
}
@Override
public int countMissing() {
return underlyingStorage.countMissing();
}
@Override
public StorageType getType() {
return AnyObjectType.INSTANCE;

View File

@ -3,6 +3,7 @@ package org.enso.table.data.column.storage;
import java.util.AbstractList;
import java.util.BitSet;
import java.util.List;
import org.enso.table.data.column.operation.CountNothing;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.operation.map.MapOperationStorage;
import org.enso.table.data.column.storage.type.StorageType;
@ -42,23 +43,6 @@ public abstract class SpecializedStorage<T> extends Storage<T> {
return size;
}
/**
* @inheritDoc
*/
@Override
public int countMissing() {
Context context = Context.getCurrent();
int count = 0;
for (int i = 0; i < size; i++) {
if (data[i] == null) {
count += 1;
}
context.safepoint();
}
return count;
}
/**
* @param idx an index
* @return the data item contained at the given index.
@ -161,7 +145,7 @@ public abstract class SpecializedStorage<T> extends Storage<T> {
@Override
public Storage<T> fillMissingFromPrevious(BoolStorage missingIndicator) {
if (missingIndicator != null && missingIndicator.countMissing() > 0) {
if (missingIndicator != null && CountNothing.anyNothing(missingIndicator)) {
throw new IllegalArgumentException(
"Missing indicator must not contain missing values itself.");
}

View File

@ -28,11 +28,6 @@ public abstract class Storage<T> implements ColumnStorage {
*/
public abstract int size();
/**
* @return the number of NA elements in this column
*/
public abstract int countMissing();
/**
* @return the type tag of this column's storage.
*/

View File

@ -16,16 +16,13 @@ import org.enso.table.data.column.operation.map.numeric.comparisons.GreaterOrEqu
import org.enso.table.data.column.operation.map.numeric.comparisons.LessComparison;
import org.enso.table.data.column.operation.map.numeric.comparisons.LessOrEqualComparison;
import org.enso.table.data.column.operation.map.numeric.isin.LongIsInOp;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.ColumnLongStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.ValueIsNothingException;
import org.enso.table.data.column.storage.*;
import org.enso.table.data.column.storage.type.IntegerType;
import org.enso.table.data.column.storage.type.StorageType;
import org.graalvm.polyglot.Context;
public abstract class AbstractLongStorage extends NumericStorage<Long>
implements ColumnLongStorage {
implements ColumnLongStorage, ColumnStorageWithNothingMap {
public abstract long getItem(int idx);
public abstract BitSet getIsMissing();
@ -173,4 +170,9 @@ public abstract class AbstractLongStorage extends NumericStorage<Long>
}
return getItem((int) index);
}
@Override
public BitSet getIsNothingMap() {
return getIsMissing();
}
}

View File

@ -27,11 +27,6 @@ public abstract class ComputedLongStorage extends AbstractLongStorage {
return size;
}
@Override
public int countMissing() {
return 0;
}
@Override
public IntegerType getType() {
return IntegerType.INT_64;

View File

@ -17,6 +17,8 @@ import org.graalvm.polyglot.Context;
public abstract class ComputedNullableLongStorage extends AbstractLongStorage {
protected final int size;
protected BitSet isMissing = null;
protected abstract Long computeItem(int idx);
protected ComputedNullableLongStorage(int size) {
@ -28,11 +30,6 @@ public abstract class ComputedNullableLongStorage extends AbstractLongStorage {
return size;
}
@Override
public int countMissing() {
return 0;
}
@Override
public IntegerType getType() {
return IntegerType.INT_64;
@ -64,16 +61,20 @@ public abstract class ComputedNullableLongStorage extends AbstractLongStorage {
@Override
public BitSet getIsMissing() {
BitSet missing = new BitSet();
Context context = Context.getCurrent();
for (int i = 0; i < size; i++) {
if (computeItem(i) == null) {
missing.set(i);
}
if (isMissing == null) {
// Only compute once as needed.
BitSet missing = new BitSet();
Context context = Context.getCurrent();
for (int i = 0; i < size; i++) {
if (computeItem(i) == null) {
missing.set(i);
}
context.safepoint();
context.safepoint();
}
isMissing = missing;
}
return missing;
return isMissing;
}
@Override

View File

@ -67,14 +67,6 @@ public final class DoubleStorage extends NumericStorage<Double>
return size;
}
/**
* @inheritDoc
*/
@Override
public int countMissing() {
return isMissing.cardinality();
}
/**
* @param idx an index
* @return the data item contained at the given index.

View File

@ -6,7 +6,6 @@ import java.util.List;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.builder.BigIntegerBuilder;
import org.enso.table.data.column.builder.NumericBuilder;
import org.enso.table.data.column.storage.ColumnStorageWithNothingMap;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.type.IntegerType;
import org.enso.table.data.column.storage.type.StorageType;
@ -18,7 +17,7 @@ import org.graalvm.polyglot.Context;
import org.graalvm.polyglot.Value;
/** A column storing 64-bit integers. */
public final class LongStorage extends AbstractLongStorage implements ColumnStorageWithNothingMap {
public final class LongStorage extends AbstractLongStorage {
// TODO [RW] at some point we will want to add separate storage classes for byte, short and int,
// for more compact storage and more efficient handling of smaller integers; for now we will be
// handling this just by checking the bounds
@ -64,14 +63,6 @@ public final class LongStorage extends AbstractLongStorage implements ColumnStor
return size;
}
/**
* @inheritDoc
*/
@Override
public int countMissing() {
return isMissing.cardinality();
}
/**
* @param idx an index
* @return the data item contained at the given index.
@ -260,9 +251,4 @@ public final class LongStorage extends AbstractLongStorage implements ColumnStor
assert widerType.fits(type);
return new LongStorage(data, size, isMissing, widerType);
}
@Override
public BitSet getIsNothingMap() {
return isMissing;
}
}

View File

@ -14,6 +14,7 @@ import java.util.stream.IntStream;
import org.enso.base.text.TextFoldingStrategy;
import org.enso.table.aggregations.Aggregator;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.operation.CountNothing;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
@ -166,7 +167,7 @@ public class MultiValueIndex<KeyType extends MultiValueKeyBase> {
*/
public KeyType findAnyNullKey() {
for (Column c : keyColumns) {
boolean containsNulls = c.getStorage().countMissing() > 0;
boolean containsNulls = CountNothing.anyNothing(c.getStorage());
if (containsNulls) {
for (KeyType key : locs.keySet()) {
if (key.hasAnyNulls()) {

View File

@ -1,6 +1,7 @@
package org.enso.table.parsing;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.operation.CountNothing;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.parsing.problems.CommonParseProblemAggregator;
import org.enso.table.parsing.problems.ParseProblemAggregator;
@ -43,8 +44,7 @@ public class TypeInferringParser extends DatatypeParser {
Storage<String> sourceStorage, CommonParseProblemAggregator problemAggregator) {
// If there are no values, the Auto parser would guess some random type (the first one that is
// checked). Instead, we just return the empty column unchanged.
boolean hasNoValues =
(sourceStorage.size() == 0) || (sourceStorage.countMissing() == sourceStorage.size());
boolean hasNoValues = (sourceStorage.size() == 0) || CountNothing.allNothing(sourceStorage);
if (hasNoValues) {
return fallbackParser.parseColumn(sourceStorage, problemAggregator);
}

View File

@ -34,11 +34,6 @@ public class ExplodingStorage extends Storage<Long> {
return array.length;
}
@Override
public int countMissing() {
return 0;
}
@Override
public StorageType getType() {
return IntegerType.INT_64;

View File

@ -630,6 +630,13 @@ add_specs suite_builder =
c1 = Column.from_vector "A" ["1", "2", "3"]
c1.parse type=Nothing . should_fail_with Illegal_Argument
group_builder.specify "should return unchanged if all are Nothing or no rows" <|
c1 = Column.from_vector "A" [Nothing, Nothing, Nothing] Value_Type.Char
c1.parse.value_type . should_equal Value_Type.Char
c2 = Column.from_vector "A" [] Value_Type.Char
c2.parse.value_type . should_equal Value_Type.Char
group_builder.specify "should error if the input column is not text" <|
c1 = Column.from_vector "A" [1, 2, 3]
r1 = c1.parse