This commit is contained in:
James Dunkerley 2024-11-21 11:01:07 +00:00
parent 57b255ebc3
commit c564b43ee7
2 changed files with 23 additions and 1 deletions

View File

@ -5,8 +5,12 @@ import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.StringStorage; import org.enso.table.data.column.storage.StringStorage;
import org.enso.table.data.table.Column; import org.enso.table.data.table.Column;
import org.graalvm.polyglot.Context; import org.graalvm.polyglot.Context;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
public class CountUntrimmed { public class CountUntrimmed {
private static final Logger LOGGER = LoggerFactory.getLogger(CountUntrimmed.class);
/** Counts the number of cells in the columns with leading or trailing whitespace. */ /** Counts the number of cells in the columns with leading or trailing whitespace. */
public static long apply(Column column) { public static long apply(Column column) {
ColumnStorage storage = column.getStorage(); ColumnStorage storage = column.getStorage();
@ -16,8 +20,11 @@ public class CountUntrimmed {
/** Counts the number of cells in the given storage with leading or trailing whitespace. */ /** Counts the number of cells in the given storage with leading or trailing whitespace. */
public static long applyToStorage(ColumnStorage storage) { public static long applyToStorage(ColumnStorage storage) {
if (storage instanceof StringStorage stringStorage) { if (storage instanceof StringStorage stringStorage) {
LOGGER.warn("Using memoized implementation for StringStorage");
return stringStorage.countLeadingTrailingWhitespace(); return stringStorage.countLeadingTrailingWhitespace();
} }
LOGGER.warn("Using fall back implementation for ColumnStorage");
return compute(storage); return compute(storage);
} }
@ -29,7 +36,7 @@ public class CountUntrimmed {
var val = storage.getItemAsObject(i); var val = storage.getItemAsObject(i);
if (val instanceof String str) { if (val instanceof String str) {
if (Text_Utils.has_leading_trailing_whitespace(str)) { if (Text_Utils.has_leading_trailing_whitespace(str)) {
count++; count += 100;
} }
} }
context.safepoint(); context.safepoint();

View File

@ -1,6 +1,12 @@
package org.enso.table.data.column.storage; package org.enso.table.data.column.storage;
import java.util.BitSet; import java.util.BitSet;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.enso.base.CompareException; import org.enso.base.CompareException;
import org.enso.base.Text_Utils; import org.enso.base.Text_Utils;
import org.enso.table.data.column.operation.CountUntrimmed; import org.enso.table.data.column.operation.CountUntrimmed;
@ -16,9 +22,12 @@ import org.enso.table.data.column.operation.map.text.StringStringOp;
import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.column.storage.type.TextType; import org.enso.table.data.column.storage.type.TextType;
import org.graalvm.polyglot.Context; import org.graalvm.polyglot.Context;
import org.slf4j.LoggerFactory;
/** A column storing strings. */ /** A column storing strings. */
public final class StringStorage extends SpecializedStorage<String> { public final class StringStorage extends SpecializedStorage<String> {
private static final Executor EXECUTOR = Executors.newSingleThreadExecutor();
private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(StringStorage.class);
private final TextType type; private final TextType type;
private long _countLeadingTrailingWhitespace = -1; private long _countLeadingTrailingWhitespace = -1;
@ -31,6 +40,10 @@ public final class StringStorage extends SpecializedStorage<String> {
public StringStorage(String[] data, int size, TextType type) { public StringStorage(String[] data, int size, TextType type) {
super(data, size, buildOps()); super(data, size, buildOps());
this.type = type; this.type = type;
CompletableFuture.runAsync(
this::countLeadingTrailingWhitespace,
EXECUTOR);
} }
@Override @Override
@ -55,10 +68,12 @@ public final class StringStorage extends SpecializedStorage<String> {
*/ */
public Long countLeadingTrailingWhitespace() { public Long countLeadingTrailingWhitespace() {
if (_countLeadingTrailingWhitespace >= 0) { if (_countLeadingTrailingWhitespace >= 0) {
LOGGER.warn("Using memoized implementation for StringStorage");
return _countLeadingTrailingWhitespace; return _countLeadingTrailingWhitespace;
} }
_countLeadingTrailingWhitespace = CountUntrimmed.compute(this); _countLeadingTrailingWhitespace = CountUntrimmed.compute(this);
LOGGER.warn("Counted leading and trailing whitespace in the column " + this.size);
return _countLeadingTrailingWhitespace; return _countLeadingTrailingWhitespace;
} }